From 6befd2d50c9305f455e1f4708476832be9ffc749 Mon Sep 17 00:00:00 2001 From: Jakub Polec Date: Fri, 13 Jun 2025 07:25:59 +0200 Subject: [PATCH] Session_01 --- README.md | 99 +- Session_01/.DS_Store | Bin 0 -> 6148 bytes Session_01/Data/BTCUSD-1h-data.csv | 83955 ++++++++++++++++ .../01_creating_dataframes.ipynb | 391 + .../02_basic_operations.ipynb | 523 + .../03_selecting_filtering.ipynb | 593 + .../04_grouping_aggregation.ipynb | 1137 + .../05_adding_modifying_columns.ipynb | 733 + .../06_handling_missing_data.ipynb | 916 + .../07_merging_joining.ipynb | 937 + .../08_sorting_ranking.ipynb | 1408 + .../09_pivot_tables.ipynb | 1978 + .../10_time_series_analysis.ipynb | 1149 + .../11_string_operation.ipynb | 1059 + .../12_data_visualization.ipynb | 1126 + .../13_advanced_data_cleaning.ipynb | 815 + Session_01/ohlcv_analysis.ipynb | 1301 + Session_01/ohlcv_analysis_advanced.ipynb | 1733 + 18 files changed, 99852 insertions(+), 1 deletion(-) create mode 100644 Session_01/.DS_Store create mode 100755 Session_01/Data/BTCUSD-1h-data.csv create mode 100755 Session_01/PandasDataFrame-exmples/01_creating_dataframes.ipynb create mode 100755 Session_01/PandasDataFrame-exmples/02_basic_operations.ipynb create mode 100755 Session_01/PandasDataFrame-exmples/03_selecting_filtering.ipynb create mode 100755 Session_01/PandasDataFrame-exmples/04_grouping_aggregation.ipynb create mode 100755 Session_01/PandasDataFrame-exmples/05_adding_modifying_columns.ipynb create mode 100755 Session_01/PandasDataFrame-exmples/06_handling_missing_data.ipynb create mode 100755 Session_01/PandasDataFrame-exmples/07_merging_joining.ipynb create mode 100755 Session_01/PandasDataFrame-exmples/08_sorting_ranking.ipynb create mode 100755 Session_01/PandasDataFrame-exmples/09_pivot_tables.ipynb create mode 100755 Session_01/PandasDataFrame-exmples/10_time_series_analysis.ipynb create mode 100755 Session_01/PandasDataFrame-exmples/11_string_operation.ipynb create mode 100755 Session_01/PandasDataFrame-exmples/12_data_visualization.ipynb create mode 100755 Session_01/PandasDataFrame-exmples/13_advanced_data_cleaning.ipynb create mode 100755 Session_01/ohlcv_analysis.ipynb create mode 100755 Session_01/ohlcv_analysis_advanced.ipynb diff --git a/README.md b/README.md index f89f387..d6ac98b 100644 --- a/README.md +++ b/README.md @@ -1 +1,98 @@ -# crypto_bot_training +# Build Your Own Crypto Trading Bot – Course Repository + +Welcome to the private repository for the **"Build Your Own Crypto Trading Bot – Hands-On Course with Alex"** by QuantJourney. + +This repository contains materials, templates, and code samples used during the 6 live sessions held in June 2025. + +> āš ļø This repository is for registered participants only. + +--- + +## Content Overview + +**Session 1: Foundations & Data Structures** +- Set up Python, IDE, and required libraries +- Pandas basics for financial time series +- Understanding OHLCV format +- Create your first crypto DataFrame with sample data + +**Session 2: Data Acquisition & Exchange Connectivity** +- WebSocket basics for real-time crypto feeds (Binance focus) +- Fail-safe reconnection logic and error handling +- Logging basics for live systems +- Build tools: order flow scanner, liquidation monitor, funding rate tracker + +**Session 3: Data Processing & Technical Analysis** +- API access using CCXT +- Handle rate limits and API error scenarios +- Reconnect & retry mechanisms +- Use pandas-ta to compute SMA, EMA, RSI +- Create your own indicator pipeline + +**Session 4: Strategy Development & Backtesting** +- Overview of strategy types (trend, mean reversion) +- Backtesting with `backtesting.py` +- Compute Sharpe ratio, drawdown, profit factor +- Add position sizing, SL/TP, and walk-forward logic +- Adjust for fees, slippage, and latency + +**Session 5: Bot Architecture & Implementation** +- Bot system design: event-driven vs loop-based +- Core components: order manager, position tracker, error handler +- Risk constraints: daily limits, max size +- Logging & monitoring structure +- Write the engine core for your bot + +**Session 6: Live Trading & Deployment** +- API keys and secure credential handling +- Deployment targets: local, VPS, cloud (e.g., Hetzner) +- Running 24/7: restart logic, alerting +- Final bot launch + testing in production +- Send alerts via Telegram or email + +--- + +## šŸ¤– AI-Enhanced Trading +Bonus section: +- Use ChatGPT/Claude for strategy suggestions +- Integrate AI-based filters or signal generation +- Let LLMs help you refactor and extend your logic + +--- + +## šŸ“ Repository Structure + +```text +/Session_01/ # Foundations & DataFrame Handling +/Session_02/ # WebSockets & Real-Time Feed Tools +/Session_03/ # Indicators & Analysis +/Session_04/ # Backtesting + Strategy Logic +/Session_05/ # Trading Bot Core Engine +/Session_06/ # Live Deployment and Monitoring +/templates/ # Starter and final bot code +/utils/ # Helper scripts for logging, reconnection, etc. +README.md # You are here +``` + +--- + +## šŸ›  Requirements +- Python 3.10+ +- Install dependencies per session in each folder or via a top-level `requirements.txt` (provided) + +--- + +## šŸ“« Support +You can reach Alex directly at [alex@quantjourney.pro](mailto:alex@quantjourney.pro) for post-course support (1 week included). + +--- + +## āš ļø Disclaimer +This project is for **educational use only**. No financial advice. Always trade with caution and use proper risk management. + +--- + +Happy coding – and trade smart. + +QuantJourney Team + diff --git a/Session_01/.DS_Store b/Session_01/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..c2cdd28e2108e2e3d10d87673f32e971c1067852 GIT binary patch literal 6148 zcmeHKy-veG4EB`_MJ(N5YhHk%Z*WwBxl+3#(GmnhLLv$h7s7GYHR-8`gO1mzE=SYzrf%0} zn&YoB!0#@jg64Ef^W^=_{A)K>H$}0ingx8E^Yrlke3Zt^ALH#+=eIGNAeHuMf_<;4 zgk64~fllZK&oy1pA@m-jmMV;v$?1ltZ=0`>-XVlXxx\n", + "0 Laptop\n", + "1 Phone\n", + "2 Tablet\n", + "3 Laptop\n", + "4 Phone\n", + "Name: Product, dtype: object\n", + "\n", + "Single column with dot notation:\n", + "0 Laptop\n", + "1 Phone\n", + "2 Tablet\n", + "3 Laptop\n", + "4 Phone\n", + "Name: Product, dtype: object\n", + "\n", + "Single column as DataFrame (note the double brackets):\n", + "Type: \n", + " Product\n", + "0 Laptop\n", + "1 Phone\n", + "2 Tablet\n", + "3 Laptop\n", + "4 Phone\n" + ] + } + ], + "source": [ + "# Single column selection (returns Series)\n", + "print(\"Single column (Product) - Returns Series:\")\n", + "product_series = df_sales['Product']\n", + "print(f\"Type: {type(product_series)}\")\n", + "print(product_series.head())\n", + "\n", + "print(\"\\nSingle column with dot notation:\")\n", + "print(df_sales.Product.head())\n", + "\n", + "print(\"\\nSingle column as DataFrame (note the double brackets):\")\n", + "product_df = df_sales[['Product']]\n", + "print(f\"Type: {type(product_df)}\")\n", + "print(product_df.head())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Multiple column selection\n", + "print(\"Multiple columns:\")\n", + "selected_cols = df_sales[['Product', 'Sales', 'Region']]\n", + "print(selected_cols.head())\n", + "\n", + "print(\"\\nUsing a list variable:\")\n", + "columns_to_select = ['Date', 'Salesperson', 'Sales']\n", + "selected_df = df_sales[columns_to_select]\n", + "print(selected_df.head())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Column selection with conditions\n", + "print(\"Selecting columns by data type:\")\n", + "numeric_cols = df_sales.select_dtypes(include=[np.number])\n", + "print(\"Numeric columns:\")\n", + "print(numeric_cols.head())\n", + "\n", + "print(\"\\nSelecting columns by name pattern:\")\n", + "# Columns containing 'S'\n", + "s_columns = [col for col in df_sales.columns if 'S' in col]\n", + "print(f\"Columns with 'S': {s_columns}\")\n", + "print(df_sales[s_columns].head())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 2. Selecting Rows\n", + "\n", + "Different methods to select specific rows." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Row selection by index position\n", + "print(\"First row (index 0):\")\n", + "print(df_sales.iloc[0])\n", + "\n", + "print(\"\\nRows 2 to 4 (positions 1, 2, 3):\")\n", + "print(df_sales.iloc[1:4])\n", + "\n", + "print(\"\\nLast 3 rows:\")\n", + "print(df_sales.iloc[-3:])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Row selection by label/index\n", + "print(\"Using .loc with index labels:\")\n", + "print(df_sales.loc[0:2]) # Note: includes endpoint with .loc\n", + "\n", + "print(\"\\nSpecific rows by index:\")\n", + "specific_rows = df_sales.loc[[0, 5, 10, 15]]\n", + "print(specific_rows[['Product', 'Sales', 'Region']])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Random sampling\n", + "print(\"Random sample of 5 rows:\")\n", + "random_sample = df_sales.sample(n=5, random_state=42)\n", + "print(random_sample[['Product', 'Sales', 'Salesperson']])\n", + "\n", + "print(\"\\nRandom 25% of the data:\")\n", + "percentage_sample = df_sales.sample(frac=0.25, random_state=42)\n", + "print(f\"Sample size: {len(percentage_sample)} rows\")\n", + "print(percentage_sample[['Product', 'Sales']].head())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 3. Boolean Indexing and Filtering\n", + "\n", + "Filter data based on conditions using boolean indexing." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Simple boolean conditions\n", + "print(\"Sales greater than 1000:\")\n", + "high_sales = df_sales[df_sales['Sales'] > 1000]\n", + "print(high_sales[['Product', 'Sales', 'Region']])\n", + "\n", + "print(\"\\nSpecific product filter:\")\n", + "laptops_only = df_sales[df_sales['Product'] == 'Laptop']\n", + "print(laptops_only[['Date', 'Sales', 'Salesperson']])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Multiple conditions with AND (&)\n", + "print(\"Laptops with sales > 1100:\")\n", + "laptop_high_sales = df_sales[(df_sales['Product'] == 'Laptop') & (df_sales['Sales'] > 1100)]\n", + "print(laptop_high_sales[['Date', 'Product', 'Sales', 'Region']])\n", + "\n", + "print(\"\\nNorth region with commission rate >= 0.10:\")\n", + "north_high_commission = df_sales[(df_sales['Region'] == 'North') & (df_sales['Commission_Rate'] >= 0.10)]\n", + "print(north_high_commission[['Product', 'Sales', 'Commission_Rate', 'Salesperson']])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Multiple conditions with OR (|)\n", + "print(\"Laptops OR high sales (>1200):\")\n", + "laptop_or_high = df_sales[(df_sales['Product'] == 'Laptop') | (df_sales['Sales'] > 1200)]\n", + "print(laptop_or_high[['Product', 'Sales', 'Region']])\n", + "\n", + "print(\"\\nNorth OR South regions:\")\n", + "north_or_south = df_sales[(df_sales['Region'] == 'North') | (df_sales['Region'] == 'South')]\n", + "print(north_or_south[['Product', 'Sales', 'Region']].head())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Using .isin() for multiple values\n", + "print(\"Products: Laptop or Phone\")\n", + "laptop_phone = df_sales[df_sales['Product'].isin(['Laptop', 'Phone'])]\n", + "print(laptop_phone[['Product', 'Sales', 'Region']].head())\n", + "\n", + "print(\"\\nSpecific salespersons:\")\n", + "selected_salespeople = df_sales[df_sales['Salesperson'].isin(['John', 'Sarah'])]\n", + "print(selected_salespeople[['Salesperson', 'Product', 'Sales']].head())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# NOT conditions using ~\n", + "print(\"NOT Tablets:\")\n", + "not_tablets = df_sales[~(df_sales['Product'] == 'Tablet')]\n", + "print(not_tablets['Product'].value_counts())\n", + "\n", + "print(\"\\nNOT in North region:\")\n", + "not_north = df_sales[~df_sales['Region'].isin(['North'])]\n", + "print(not_north['Region'].value_counts())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 4. Advanced Selection with .loc and .iloc\n", + "\n", + "Powerful selection methods for precise data access." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# .loc for label-based selection\n", + "print(\".loc examples - Label-based selection:\")\n", + "\n", + "# Select specific rows and columns\n", + "print(\"Rows 0-2, specific columns:\")\n", + "result = df_sales.loc[0:2, ['Product', 'Sales', 'Region']]\n", + "print(result)\n", + "\n", + "print(\"\\nAll rows, specific columns:\")\n", + "result = df_sales.loc[:, ['Product', 'Sales']]\n", + "print(result.head())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# .iloc for position-based selection\n", + "print(\".iloc examples - Position-based selection:\")\n", + "\n", + "# Select by position\n", + "print(\"First 3 rows, first 3 columns:\")\n", + "result = df_sales.iloc[0:3, 0:3]\n", + "print(result)\n", + "\n", + "print(\"\\nEvery other row, specific columns:\")\n", + "result = df_sales.iloc[::2, [1, 2, 3]] # Every 2nd row, columns 1,2,3\n", + "print(result.head())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Combining boolean indexing with .loc\n", + "print(\"Boolean indexing with .loc:\")\n", + "\n", + "# High sales, specific columns\n", + "high_sales_subset = df_sales.loc[df_sales['Sales'] > 1000, ['Product', 'Sales', 'Salesperson']]\n", + "print(high_sales_subset)\n", + "\n", + "print(\"\\nComplex condition with .loc:\")\n", + "complex_filter = (df_sales['Product'] == 'Laptop') & (df_sales['Region'] == 'North')\n", + "result = df_sales.loc[complex_filter, ['Date', 'Sales', 'Commission_Rate']]\n", + "print(result)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 5. String-based Filtering\n", + "\n", + "Filter data based on string patterns and conditions." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# String methods for filtering\n", + "print(\"Salesperson names starting with 'J':\")\n", + "j_names = df_sales[df_sales['Salesperson'].str.startswith('J')]\n", + "print(j_names[['Salesperson', 'Product', 'Sales']].head())\n", + "\n", + "print(\"\\nRegions containing 'th':\")\n", + "th_regions = df_sales[df_sales['Region'].str.contains('th')]\n", + "print(th_regions[['Region', 'Product', 'Sales']].head())\n", + "\n", + "print(\"\\nProducts with exactly 5 characters:\")\n", + "five_char_products = df_sales[df_sales['Product'].str.len() == 5]\n", + "print(five_char_products['Product'].unique())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 6. Date-based Filtering\n", + "\n", + "Filter data based on date conditions." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Date filtering\n", + "print(\"Data from first week of January 2024:\")\n", + "first_week = df_sales[df_sales['Date'] <= '2024-01-07']\n", + "print(first_week[['Date', 'Product', 'Sales']])\n", + "\n", + "print(\"\\nData from specific date range:\")\n", + "date_range = df_sales[(df_sales['Date'] >= '2024-01-10') & (df_sales['Date'] <= '2024-01-15')]\n", + "print(date_range[['Date', 'Product', 'Sales', 'Region']])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Using date components\n", + "print(\"Data from weekends (Saturday=5, Sunday=6):\")\n", + "weekends = df_sales[df_sales['Date'].dt.dayofweek >= 5]\n", + "print(weekends[['Date', 'Product', 'Sales']])\n", + "\n", + "print(\"\\nData from specific days of week:\")\n", + "mondays = df_sales[df_sales['Date'].dt.day_name() == 'Monday']\n", + "print(f\"Monday sales: {len(mondays)} records\")\n", + "if len(mondays) > 0:\n", + " print(mondays[['Date', 'Product', 'Sales']].head())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 7. Query Method\n", + "\n", + "Alternative syntax for filtering using the `.query()` method." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Using .query() method for cleaner syntax\n", + "print(\"Using .query() method:\")\n", + "\n", + "# Simple condition\n", + "high_sales_query = df_sales.query('Sales > 1000')\n", + "print(f\"High sales records: {len(high_sales_query)}\")\n", + "print(high_sales_query[['Product', 'Sales', 'Region']].head())\n", + "\n", + "print(\"\\nMultiple conditions:\")\n", + "complex_query = df_sales.query('Product == \"Laptop\" and Region == \"North\"')\n", + "print(complex_query[['Date', 'Sales', 'Commission_Rate']])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Query with variables\n", + "min_sales = 900\n", + "target_region = 'East'\n", + "\n", + "print(\"Query with variables:\")\n", + "var_query = df_sales.query('Sales >= @min_sales and Region == @target_region')\n", + "print(var_query[['Product', 'Sales', 'Region']])\n", + "\n", + "print(\"\\nQuery with list (isin equivalent):\")\n", + "products = ['Laptop', 'Phone']\n", + "list_query = df_sales.query('Product in @products')\n", + "print(f\"Records for {products}: {len(list_query)}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Practice Exercises\n", + "\n", + "Test your filtering and selection skills:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Exercise 1: Complex Filtering\n", + "# Find all sales where:\n", + "# - Product is either 'Laptop' or 'Phone'\n", + "# - Sales are above the median\n", + "# - Commission rate is at least 0.10\n", + "# Show only Date, Product, Sales, and Salesperson columns\n", + "\n", + "# Your code here:\n", + "median_sales = df_sales['Sales'].median()\n", + "print(f\"Median sales: {median_sales}\")\n", + "\n", + "# complex_filter = ?\n", + "# result = ?\n", + "# print(result)" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [], + "source": [ + "# Exercise 2: Date-based Analysis\n", + "# Find sales data for the second week of January 2024\n", + "# Calculate the average sales for that week\n", + "# Show which products were sold and by whom\n", + "\n", + "# Your code here:\n" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [], + "source": [ + "# Exercise 3: Performance Analysis\n", + "# Create a function that finds top performers:\n", + "# - Takes a DataFrame and a percentile (e.g., 0.8 for top 20%)\n", + "# - Returns salespeople whose average sales are in the top percentile\n", + "# - Show their average sales and total number of sales\n", + "\n", + "def find_top_performers(df, percentile=0.8):\n", + " \"\"\"Find top performing salespeople\"\"\"\n", + " # Your code here:\n", + " pass\n", + "\n", + "# Test your function\n", + "# top_performers = find_top_performers(df_sales, 0.8)\n", + "# print(top_performers)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Key Takeaways\n", + "\n", + "1. **Column Selection**: Use `[]` for single/multiple columns, understand Series vs DataFrame return types\n", + "2. **Row Selection**: `.iloc[]` for position-based, `.loc[]` for label-based selection\n", + "3. **Boolean Indexing**: Use `&` (AND), `|` (OR), `~` (NOT) for combining conditions\n", + "4. **Parentheses Matter**: Always wrap individual conditions in parentheses when combining\n", + "5. **`.isin()` Method**: Efficient way to filter for multiple values\n", + "6. **String Methods**: Use `.str` accessor for string-based filtering\n", + "7. **Date Filtering**: Leverage `.dt` accessor for date-based conditions\n", + "8. **`.query()` Method**: Alternative syntax for complex filtering\n", + "\n", + "## Common Mistakes to Avoid\n", + "\n", + "- Using `and/or` instead of `&/|` in boolean conditions\n", + "- Forgetting parentheses around conditions\n", + "- Confusing `.loc[]` and `.iloc[]` usage\n", + "- Not handling empty results from filtering\n", + "- Using chained indexing instead of `.loc[]`\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.13.3" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/Session_01/PandasDataFrame-exmples/04_grouping_aggregation.ipynb b/Session_01/PandasDataFrame-exmples/04_grouping_aggregation.ipynb new file mode 100755 index 0000000..3f8ca5f --- /dev/null +++ b/Session_01/PandasDataFrame-exmples/04_grouping_aggregation.ipynb @@ -0,0 +1,1137 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Session 1 - DataFrames - Lesson 4: Grouping and Aggregation\n", + "\n", + "## Learning Objectives\n", + "- Master the `.groupby()` operation for data aggregation\n", + "- Learn different aggregation functions and methods\n", + "- Understand multi-level grouping and hierarchical indexing\n", + "- Practice custom aggregation functions\n", + "- Explore advanced grouping techniques\n", + "\n", + "## Prerequisites\n", + "- Completed Lessons 1-3\n", + "- Understanding of basic statistical concepts (mean, sum, count, etc.)" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Dataset created:\n", + "Shape: (200, 11)\n", + "\n", + "First few rows:\n", + " Date Product Category Sales Quantity Region Salesperson \\\n", + "0 2024-01-01 Monitor Accessories 1068 6 West Diana \n", + "1 2024-01-02 Headphones Electronics 918 1 East Alice \n", + "2 2024-01-03 Tablet Accessories 1133 5 North Diana \n", + "3 2024-01-04 Headphones Electronics 1340 9 West Bob \n", + "4 2024-01-05 Headphones Electronics 1150 2 North Eve \n", + "\n", + " Commission_Rate Commission Month Quarter \n", + "0 0.15 160.20 1 1 \n", + "1 0.12 110.16 1 1 \n", + "2 0.08 90.64 1 1 \n", + "3 0.08 107.20 1 1 \n", + "4 0.12 138.00 1 1 \n" + ] + } + ], + "source": [ + "# Import required libraries\n", + "import pandas as pd\n", + "import numpy as np\n", + "from datetime import datetime, timedelta\n", + "\n", + "# Create comprehensive sample dataset\n", + "np.random.seed(42)\n", + "n_records = 200\n", + "\n", + "sales_data = {\n", + " 'Date': pd.date_range('2024-01-01', periods=n_records, freq='D'),\n", + " 'Product': np.random.choice(['Laptop', 'Phone', 'Tablet', 'Monitor', 'Headphones'], n_records),\n", + " 'Category': np.random.choice(['Electronics', 'Accessories'], n_records, p=[0.8, 0.2]),\n", + " 'Sales': np.random.normal(1000, 300, n_records).astype(int),\n", + " 'Quantity': np.random.randint(1, 10, n_records),\n", + " 'Region': np.random.choice(['North', 'South', 'East', 'West'], n_records),\n", + " 'Salesperson': np.random.choice(['Alice', 'Bob', 'Charlie', 'Diana', 'Eve', 'Frank'], n_records),\n", + " 'Commission_Rate': np.random.choice([0.08, 0.10, 0.12, 0.15], n_records)\n", + "}\n", + "\n", + "df_sales = pd.DataFrame(sales_data)\n", + "df_sales['Sales'] = np.abs(df_sales['Sales']) # Ensure positive values\n", + "df_sales['Commission'] = df_sales['Sales'] * df_sales['Commission_Rate']\n", + "df_sales['Month'] = df_sales['Date'].dt.month\n", + "df_sales['Quarter'] = df_sales['Date'].dt.quarter\n", + "\n", + "print(\"Dataset created:\")\n", + "print(f\"Shape: {df_sales.shape}\")\n", + "print(\"\\nFirst few rows:\")\n", + "print(df_sales.head())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 1. Basic GroupBy Operations\n", + "\n", + "Understanding the fundamentals of grouping data." + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Total sales by product:\n", + "Product\n", + "Headphones 36032\n", + "Laptop 45296\n", + "Monitor 47419\n", + "Phone 36847\n", + "Tablet 34711\n", + "Name: Sales, dtype: int64\n", + "\n", + "Type: \n", + "\n", + "Average sales by region:\n", + "Region\n", + "East 1030.52\n", + "North 1007.14\n", + "South 966.86\n", + "West 999.78\n", + "Name: Sales, dtype: float64\n" + ] + } + ], + "source": [ + "# Simple groupby with single aggregation\n", + "print(\"Total sales by product:\")\n", + "product_sales = df_sales.groupby('Product')['Sales'].sum()\n", + "print(product_sales)\n", + "print(f\"\\nType: {type(product_sales)}\")\n", + "\n", + "print(\"\\nAverage sales by region:\")\n", + "region_avg = df_sales.groupby('Region')['Sales'].mean().round(2)\n", + "print(region_avg)" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Multiple statistics for sales by product:\n", + " count sum mean std\n", + "Product \n", + "Headphones 36 36032 1000.89 298.06\n", + "Laptop 43 45296 1053.40 361.78\n", + "Monitor 49 47419 967.73 270.57\n", + "Phone 35 36847 1052.77 323.17\n", + "Tablet 37 34711 938.14 309.20\n", + "\n", + "With custom column names:\n", + " Count Total_Sales Average_Sales Std_Dev\n", + "Product \n", + "Headphones 36 36032 1000.89 298.06\n", + "Laptop 43 45296 1053.40 361.78\n", + "Monitor 49 47419 967.73 270.57\n", + "Phone 35 36847 1052.77 323.17\n", + "Tablet 37 34711 938.14 309.20\n" + ] + } + ], + "source": [ + "# Multiple aggregations on the same column\n", + "print(\"Multiple statistics for sales by product:\")\n", + "product_stats = df_sales.groupby('Product')['Sales'].agg(['count', 'sum', 'mean', 'std']).round(2)\n", + "print(product_stats)\n", + "\n", + "print(\"\\nWith custom column names:\")\n", + "product_stats_named = df_sales.groupby('Product')['Sales'].agg([\n", + " ('Count', 'count'),\n", + " ('Total_Sales', 'sum'),\n", + " ('Average_Sales', 'mean'),\n", + " ('Std_Dev', 'std')\n", + "]).round(2)\n", + "print(product_stats_named)" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Aggregating multiple columns:\n", + " Sales Quantity Commission \n", + " sum mean count sum mean sum mean\n", + "Product \n", + "Headphones 36032 1000.89 36 178 4.94 4004.08 111.22\n", + "Laptop 45296 1053.40 43 219 5.09 5018.59 116.71\n", + "Monitor 47419 967.73 49 253 5.16 5078.17 103.64\n", + "Phone 36847 1052.77 35 162 4.63 4121.58 117.76\n", + "Tablet 34711 938.14 37 194 5.24 3699.82 100.00\n", + "\n", + "Flattened column names:\n", + " Sales_sum Sales_mean Sales_count Quantity_sum Quantity_mean \\\n", + "Product \n", + "Headphones 36032 1000.89 36 178 4.94 \n", + "Laptop 45296 1053.40 43 219 5.09 \n", + "Monitor 47419 967.73 49 253 5.16 \n", + "Phone 36847 1052.77 35 162 4.63 \n", + "Tablet 34711 938.14 37 194 5.24 \n", + "\n", + " Commission_sum Commission_mean \n", + "Product \n", + "Headphones 4004.08 111.22 \n", + "Laptop 5018.59 116.71 \n", + "Monitor 5078.17 103.64 \n", + "Phone 4121.58 117.76 \n", + "Tablet 3699.82 100.00 \n" + ] + } + ], + "source": [ + "# Groupby with multiple columns and aggregations\n", + "print(\"Aggregating multiple columns:\")\n", + "multi_agg = df_sales.groupby('Product').agg({\n", + " 'Sales': ['sum', 'mean', 'count'],\n", + " 'Quantity': ['sum', 'mean'],\n", + " 'Commission': ['sum', 'mean']\n", + "}).round(2)\n", + "print(multi_agg)\n", + "\n", + "print(\"\\nFlattened column names:\")\n", + "multi_agg_flat = df_sales.groupby('Product').agg({\n", + " 'Sales': ['sum', 'mean', 'count'],\n", + " 'Quantity': ['sum', 'mean'],\n", + " 'Commission': ['sum', 'mean']\n", + "}).round(2)\n", + "multi_agg_flat.columns = ['_'.join(col).strip() for col in multi_agg_flat.columns.values]\n", + "print(multi_agg_flat.head())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 2. Multiple Group Columns\n", + "\n", + "Grouping by multiple categorical variables." + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Sales by Region and Product:\n", + "Region Product \n", + "East Headphones 9791\n", + " Laptop 17001\n", + " Monitor 11728\n", + " Phone 6514\n", + " Tablet 10614\n", + "North Headphones 11527\n", + " Laptop 6514\n", + " Monitor 11273\n", + " Phone 13293\n", + " Tablet 7750\n", + "South Headphones 7131\n", + " Laptop 13003\n", + " Monitor 12007\n", + " Phone 10115\n", + " Tablet 7054\n", + "West Headphones 7583\n", + " Laptop 8778\n", + " Monitor 12411\n", + " Phone 6925\n", + " Tablet 9293\n", + "Name: Sales, dtype: int64\n", + "\n", + "As DataFrame with reset_index():\n", + " Region Product Sales\n", + "0 East Headphones 9791\n", + "1 East Laptop 17001\n", + "2 East Monitor 11728\n", + "3 East Phone 6514\n", + "4 East Tablet 10614\n", + "5 North Headphones 11527\n", + "6 North Laptop 6514\n", + "7 North Monitor 11273\n", + "8 North Phone 13293\n", + "9 North Tablet 7750\n" + ] + } + ], + "source": [ + "# Group by multiple columns\n", + "print(\"Sales by Region and Product:\")\n", + "region_product = df_sales.groupby(['Region', 'Product'])['Sales'].sum().round(2)\n", + "print(region_product)\n", + "\n", + "print(\"\\nAs DataFrame with reset_index():\")\n", + "region_product_df = df_sales.groupby(['Region', 'Product'])['Sales'].sum().reset_index()\n", + "print(region_product_df.head(10))" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Hierarchical indexing example:\n", + "First 15 entries:\n", + "Region Product Month\n", + "East Headphones 1 2287\n", + " 3 1194\n", + " 4 985\n", + " 5 2030\n", + " 6 883\n", + " 7 2412\n", + " Laptop 1 1585\n", + " 2 3151\n", + " 3 4563\n", + " 4 2966\n", + " 5 919\n", + " 6 2504\n", + " 7 1313\n", + " Monitor 1 4583\n", + " 2 536\n", + "Name: Sales, dtype: int64\n", + "\n", + "Accessing specific groups:\n", + "North region, Laptop sales by month:\n", + "Month\n", + "2 1976\n", + "3 1141\n", + "4 1342\n", + "5 43\n", + "6 844\n", + "7 1168\n", + "Name: Sales, dtype: int64\n", + "\n", + "All North region sales:\n", + "Product Month\n", + "Headphones 1 1769\n", + " 2 1080\n", + " 3 2884\n", + " 4 1460\n", + " 5 4334\n", + "Name: Sales, dtype: int64\n" + ] + } + ], + "source": [ + "# Working with hierarchical index\n", + "print(\"Hierarchical indexing example:\")\n", + "hierarchy = df_sales.groupby(['Region', 'Product', 'Month'])['Sales'].sum()\n", + "print(\"First 15 entries:\")\n", + "print(hierarchy.head(15))\n", + "\n", + "print(\"\\nAccessing specific groups:\")\n", + "print(\"North region, Laptop sales by month:\")\n", + "try:\n", + " north_laptops = hierarchy.loc[('North', 'Laptop')]\n", + " print(north_laptops)\n", + "except KeyError:\n", + " print(\"No data available for North region Laptops\")\n", + "\n", + "print(\"\\nAll North region sales:\")\n", + "try:\n", + " north_all = hierarchy.loc['North']\n", + " print(north_all.head())\n", + "except KeyError:\n", + " print(\"No data available for North region\")" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Unstacking hierarchical data:\n", + "Product Headphones Laptop Monitor Phone Tablet\n", + "Region \n", + "East 9791 17001 11728 6514 10614\n", + "North 11527 6514 11273 13293 7750\n", + "South 7131 13003 12007 10115 7054\n", + "West 7583 8778 12411 6925 9293\n", + "\n", + "Unstacking different levels:\n", + "Region East North South West\n", + "Product \n", + "Headphones 9791 11527 7131 7583\n", + "Laptop 17001 6514 13003 8778\n", + "Monitor 11728 11273 12007 12411\n", + "Phone 6514 13293 10115 6925\n", + "Tablet 10614 7750 7054 9293\n" + ] + } + ], + "source": [ + "# Unstacking hierarchical data\n", + "print(\"Unstacking hierarchical data:\")\n", + "region_product_pivot = df_sales.groupby(['Region', 'Product'])['Sales'].sum().unstack(fill_value=0)\n", + "print(region_product_pivot)\n", + "\n", + "print(\"\\nUnstacking different levels:\")\n", + "product_region_pivot = df_sales.groupby(['Product', 'Region'])['Sales'].sum().unstack(fill_value=0)\n", + "print(product_region_pivot)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 3. Common Aggregation Functions\n", + "\n", + "Explore the most useful aggregation functions." + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Comprehensive statistics by salesperson:\n", + " Count Total Mean Median Std Min Max Q25 Q75\n", + "Salesperson \n", + "Alice 35 33468 956.23 929.0 288.45 298 1588 841.00 1089.50\n", + "Bob 36 36427 1011.86 1050.0 314.32 230 1702 802.25 1196.25\n", + "Charlie 37 39529 1068.35 1070.0 329.60 539 1761 806.00 1313.00\n", + "Diana 29 28906 996.76 1068.0 325.84 43 1607 831.00 1179.00\n", + "Eve 34 35134 1033.35 1046.0 323.72 519 1976 775.00 1159.25\n", + "Frank 29 26841 925.55 904.0 296.00 381 1477 745.00 1145.00\n" + ] + } + ], + "source": [ + "# Comprehensive aggregation example\n", + "print(\"Comprehensive statistics by salesperson:\")\n", + "salesperson_stats = df_sales.groupby('Salesperson')['Sales'].agg([\n", + " 'count', # Number of sales\n", + " 'sum', # Total sales\n", + " 'mean', # Average sale\n", + " 'median', # Median sale\n", + " 'std', # Standard deviation\n", + " 'min', # Minimum sale\n", + " 'max', # Maximum sale\n", + " lambda x: x.quantile(0.25), # 25th percentile\n", + " lambda x: x.quantile(0.75) # 75th percentile\n", + "]).round(2)\n", + "\n", + "# Rename lambda columns\n", + "salesperson_stats.columns = ['Count', 'Total', 'Mean', 'Median', 'Std', 'Min', 'Max', 'Q25', 'Q75']\n", + "print(salesperson_stats)" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Monthly sales trends:\n", + " Sales Quantity Commission\n", + " sum mean count sum sum\n", + "Month \n", + "1 31482 1015.55 31 157 3324.78\n", + "2 29854 1029.45 29 153 3437.00\n", + "3 28500 919.35 31 173 3242.74\n", + "4 27043 901.43 30 124 2973.03\n", + "5 31530 1017.10 31 166 3351.57\n", + "6 33686 1122.87 30 147 3770.67\n", + "7 18210 1011.67 18 86 1822.45\n", + "\n", + "Quarterly performance:\n", + " Sales Quantity Salesperson\n", + " sum mean sum nunique\n", + "Quarter \n", + "1 89836 987.21 483 6\n", + "2 92259 1013.84 437 6\n", + "3 18210 1011.67 86 6\n" + ] + } + ], + "source": [ + "# Date-based aggregations\n", + "print(\"Monthly sales trends:\")\n", + "monthly_sales = df_sales.groupby('Month').agg({\n", + " 'Sales': ['sum', 'mean', 'count'],\n", + " 'Quantity': 'sum',\n", + " 'Commission': 'sum'\n", + "}).round(2)\n", + "print(monthly_sales)\n", + "\n", + "print(\"\\nQuarterly performance:\")\n", + "quarterly_sales = df_sales.groupby('Quarter').agg({\n", + " 'Sales': ['sum', 'mean'],\n", + " 'Quantity': 'sum',\n", + " 'Salesperson': 'nunique' # Number of unique salespeople\n", + "}).round(2)\n", + "print(quarterly_sales)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 4. Custom Aggregation Functions\n", + "\n", + "Create your own aggregation functions for specific business logic." + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Custom aggregations by product:\n", + " Mean Std_Dev Range High_Value_Count CV\n", + "Product \n", + "Headphones 1000.889 298.055 1309 8 0.298\n", + "Laptop 1053.395 361.778 1933 14 0.343\n", + "Monitor 967.735 270.570 1151 8 0.280\n", + "Phone 1052.771 323.173 1321 11 0.307\n", + "Tablet 938.135 309.205 1314 6 0.330\n" + ] + } + ], + "source": [ + "# Custom aggregation functions\n", + "def sales_range(series):\n", + " \"\"\"Calculate the range of sales values\"\"\"\n", + " return series.max() - series.min()\n", + "\n", + "def high_value_count(series, threshold=1200):\n", + " \"\"\"Count sales above a threshold\"\"\"\n", + " return (series > threshold).sum()\n", + "\n", + "def coefficient_of_variation(series):\n", + " \"\"\"Calculate coefficient of variation (std/mean)\"\"\"\n", + " return series.std() / series.mean() if series.mean() != 0 else 0\n", + "\n", + "print(\"Custom aggregations by product:\")\n", + "custom_agg = df_sales.groupby('Product')['Sales'].agg([\n", + " 'mean',\n", + " 'std',\n", + " sales_range,\n", + " high_value_count,\n", + " coefficient_of_variation\n", + "]).round(3)\n", + "\n", + "custom_agg.columns = ['Mean', 'Std_Dev', 'Range', 'High_Value_Count', 'CV']\n", + "print(custom_agg)" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Lambda function aggregations:\n", + " Total Average Top_10_Percent Above_Average_Count \\\n", + "Region \n", + "East 55648 1030.519 1416.6 25 \n", + "North 50357 1007.140 1342.1 29 \n", + "South 49310 966.863 1346.0 26 \n", + "West 44990 999.778 1469.6 19 \n", + "\n", + " Sales_Concentration \n", + "Region \n", + "East 0.134 \n", + "North 0.159 \n", + "South 0.160 \n", + "West 0.171 \n" + ] + } + ], + "source": [ + "# Lambda functions for quick custom aggregations\n", + "print(\"Lambda function aggregations:\")\n", + "lambda_agg = df_sales.groupby('Region')['Sales'].agg([\n", + " ('Total', 'sum'),\n", + " ('Average', 'mean'),\n", + " ('Top_10_Percent', lambda x: x.quantile(0.9)),\n", + " ('Above_Average_Count', lambda x: (x > x.mean()).sum()),\n", + " ('Sales_Concentration', lambda x: x.nlargest(5).sum() / x.sum()) # Top 5 sales as % of total\n", + "]).round(3)\n", + "print(lambda_agg)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 5. Transform and Apply Operations\n", + "\n", + "Learn `.transform()` and `.apply()` for more complex group operations." + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Transform operations:\n", + "Sample with transform columns:\n", + " Product Sales Product_Avg_Sales Sales_vs_Product_Avg\n", + "0 Monitor 1068 967.734694 100.265306\n", + "1 Headphones 918 1000.888889 -82.888889\n", + "2 Tablet 1133 938.135135 194.864865\n", + "3 Headphones 1340 1000.888889 339.111111\n", + "4 Headphones 1150 1000.888889 149.111111\n", + "\n", + "Ranking within groups:\n", + " Product Sales Sales_Rank_in_Product\n", + "0 Monitor 1068 16.5\n", + "1 Headphones 918 24.0\n", + "2 Tablet 1133 12.0\n", + "3 Headphones 1340 6.0\n", + "4 Headphones 1150 12.0\n", + "5 Phone 1318 7.5\n", + "6 Tablet 799 24.0\n", + "7 Tablet 739 27.0\n", + "8 Tablet 836 22.0\n", + "9 Headphones 619 32.0\n" + ] + } + ], + "source": [ + "# Transform operations - return same size as original\n", + "print(\"Transform operations:\")\n", + "\n", + "# Add group statistics as new columns\n", + "df_transformed = df_sales.copy()\n", + "df_transformed['Product_Avg_Sales'] = df_sales.groupby('Product')['Sales'].transform('mean')\n", + "df_transformed['Region_Total_Sales'] = df_sales.groupby('Region')['Sales'].transform('sum')\n", + "df_transformed['Sales_vs_Product_Avg'] = df_transformed['Sales'] - df_transformed['Product_Avg_Sales']\n", + "\n", + "print(\"Sample with transform columns:\")\n", + "print(df_transformed[['Product', 'Sales', 'Product_Avg_Sales', 'Sales_vs_Product_Avg']].head())\n", + "\n", + "print(\"\\nRanking within groups:\")\n", + "df_transformed['Sales_Rank_in_Product'] = df_sales.groupby('Product')['Sales'].rank(ascending=False)\n", + "print(df_transformed[['Product', 'Sales', 'Sales_Rank_in_Product']].head(10))" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Apply operations:\n", + " total_sales avg_sales num_transactions top_salesperson \\\n", + "Product \n", + "Headphones 36032 1000.89 36 Diana \n", + "Laptop 45296 1053.40 43 Eve \n", + "Monitor 47419 967.73 49 Alice \n", + "Phone 36847 1052.77 35 Bob \n", + "Tablet 34711 938.14 37 Bob \n", + "\n", + " sales_per_quantity \n", + "Product \n", + "Headphones 375.94 \n", + "Laptop 345.93 \n", + "Monitor 257.01 \n", + "Phone 373.06 \n", + "Tablet 313.06 \n", + "\n", + "Top performing sale in each region:\n", + " Product Sales Salesperson\n", + "Region \n", + "East Laptop 1585 Charlie\n", + "North Laptop 1976 Eve\n", + "South Laptop 1761 Charlie\n", + "West Headphones 1607 Diana\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/var/folders/cd/drz_5fvd69ddxy7rzw4p3zx80000gn/T/ipykernel_61733/3089764804.py:14: DeprecationWarning: DataFrameGroupBy.apply operated on the grouping columns. This behavior is deprecated, and in a future version of pandas the grouping columns will be excluded from the operation. Either pass `include_groups=False` to exclude the groupings or explicitly select the grouping columns after groupby to silence this warning.\n", + " apply_result = df_sales.groupby('Product').apply(group_summary).round(2)\n", + "/var/folders/cd/drz_5fvd69ddxy7rzw4p3zx80000gn/T/ipykernel_61733/3089764804.py:18: DeprecationWarning: DataFrameGroupBy.apply operated on the grouping columns. This behavior is deprecated, and in a future version of pandas the grouping columns will be excluded from the operation. Either pass `include_groups=False` to exclude the groupings or explicitly select the grouping columns after groupby to silence this warning.\n", + " top_sales_by_region = df_sales.groupby('Region').apply(lambda x: x.loc[x['Sales'].idxmax()])\n" + ] + } + ], + "source": [ + "# Apply operations - can return different structures\n", + "print(\"Apply operations:\")\n", + "\n", + "def group_summary(group):\n", + " \"\"\"Return a summary Series for each group\"\"\"\n", + " return pd.Series({\n", + " 'total_sales': group['Sales'].sum(),\n", + " 'avg_sales': group['Sales'].mean(),\n", + " 'num_transactions': len(group),\n", + " 'top_salesperson': group.loc[group['Sales'].idxmax(), 'Salesperson'],\n", + " 'sales_per_quantity': (group['Sales'] / group['Quantity']).mean()\n", + " })\n", + "\n", + "apply_result = df_sales.groupby('Product').apply(group_summary).round(2)\n", + "print(apply_result)\n", + "\n", + "print(\"\\nTop performing sale in each region:\")\n", + "top_sales_by_region = df_sales.groupby('Region').apply(lambda x: x.loc[x['Sales'].idxmax()])\n", + "print(top_sales_by_region[['Product', 'Sales', 'Salesperson']])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 6. Filtering Groups\n", + "\n", + "Filter entire groups based on group-level conditions." + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Groups with more than 30 transactions:\n", + "Original data: 200 rows\n", + "Filtered data: 200 rows\n", + "\n", + "Product transaction counts in filtered data:\n", + "Product\n", + "Monitor 49\n", + "Laptop 43\n", + "Tablet 37\n", + "Headphones 36\n", + "Phone 35\n", + "Name: count, dtype: int64\n", + "\n", + "Groups with average sales > $1000:\n", + "High-value products:\n", + "Product\n", + "Headphones 1000.89\n", + "Laptop 1053.40\n", + "Phone 1052.77\n", + "Name: Sales, dtype: float64\n" + ] + } + ], + "source": [ + "# Filter groups based on group characteristics\n", + "print(\"Groups with more than 30 transactions:\")\n", + "active_products = df_sales.groupby('Product').filter(lambda x: len(x) > 30)\n", + "print(f\"Original data: {len(df_sales)} rows\")\n", + "print(f\"Filtered data: {len(active_products)} rows\")\n", + "print(\"\\nProduct transaction counts in filtered data:\")\n", + "print(active_products['Product'].value_counts())\n", + "\n", + "print(\"\\nGroups with average sales > $1000:\")\n", + "high_value_products = df_sales.groupby('Product').filter(lambda x: x['Sales'].mean() > 1000)\n", + "print(\"High-value products:\")\n", + "print(high_value_products.groupby('Product')['Sales'].mean().round(2))" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Salespeople with consistent performance:\n", + "Consistent performers analysis:\n", + " Count Mean Std CV\n", + "Salesperson \n", + "Alice 35 956.229 288.455 0.302\n", + "Bob 36 1011.861 314.322 0.311\n", + "Charlie 37 1068.351 329.602 0.309\n", + "Diana 29 996.759 325.836 0.327\n", + "Eve 34 1033.353 323.720 0.313\n", + "Frank 29 925.552 296.002 0.320\n" + ] + } + ], + "source": [ + "# Complex filtering conditions\n", + "print(\"Salespeople with consistent performance:\")\n", + "# Filter salespeople with at least 20 sales and CV < 0.5\n", + "consistent_performers = df_sales.groupby('Salesperson').filter(\n", + " lambda x: len(x) >= 20 and (x['Sales'].std() / x['Sales'].mean()) < 0.5\n", + ")\n", + "\n", + "if len(consistent_performers) > 0:\n", + " print(\"Consistent performers analysis:\")\n", + " consistency_analysis = consistent_performers.groupby('Salesperson')['Sales'].agg([\n", + " 'count', 'mean', 'std', lambda x: x.std()/x.mean()\n", + " ]).round(3)\n", + " consistency_analysis.columns = ['Count', 'Mean', 'Std', 'CV']\n", + " print(consistency_analysis)\n", + "else:\n", + " print(\"No salespeople meet the consistency criteria\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 7. Advanced Grouping Techniques\n", + "\n", + "More sophisticated grouping operations." + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Grouping by sales value ranges:\n", + " Sales Quantity Commission\n", + " count mean sum sum sum\n", + "Sales_Category \n", + "Low 8 330.88 2647 54 272.61\n", + "Medium 92 788.87 72576 478 8110.68\n", + "High 89 1202.89 107057 423 11652.60\n", + "Very High 11 1638.64 18025 51 1886.35\n", + "\n", + "Product distribution across sales categories:\n", + "Product Headphones Laptop Monitor Phone Tablet\n", + "Sales_Category \n", + "Low 1 2 1 2 2\n", + "Medium 18 17 26 12 19\n", + "High 16 21 21 17 14\n", + "Very High 1 3 1 4 2\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/var/folders/cd/drz_5fvd69ddxy7rzw4p3zx80000gn/T/ipykernel_61733/1556724418.py:8: FutureWarning: The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning.\n", + " sales_category_analysis = df_sales.groupby('Sales_Category').agg({\n" + ] + } + ], + "source": [ + "# Groupby with categorical cuts\n", + "print(\"Grouping by sales value ranges:\")\n", + "# Create sales categories\n", + "df_sales['Sales_Category'] = pd.cut(df_sales['Sales'], \n", + " bins=[0, 500, 1000, 1500, float('inf')],\n", + " labels=['Low', 'Medium', 'High', 'Very High'])\n", + "\n", + "sales_category_analysis = df_sales.groupby('Sales_Category').agg({\n", + " 'Sales': ['count', 'mean', 'sum'],\n", + " 'Quantity': 'sum',\n", + " 'Commission': 'sum'\n", + "}).round(2)\n", + "print(sales_category_analysis)\n", + "\n", + "print(\"\\nProduct distribution across sales categories:\")\n", + "category_product_cross = pd.crosstab(df_sales['Sales_Category'], df_sales['Product'])\n", + "print(category_product_cross)" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Weekly sales analysis:\n", + " Sales Product Salesperson\n", + " sum mean count nunique\n", + "Week \n", + "1 7726 1103.71 7 Headphones 4\n", + "2 6078 868.29 7 Tablet 4\n", + "3 7281 1040.14 7 Monitor 5\n", + "4 6867 981.00 7 Laptop 4\n", + "5 7285 1040.71 7 Monitor 4\n", + "6 7994 1142.00 7 Headphones 4\n", + "7 6652 950.29 7 Phone 3\n", + "8 7125 1017.86 7 Monitor 4\n", + "9 6293 899.00 7 Monitor 5\n", + "10 7755 1107.86 7 Phone 4\n", + "\n", + "Day of week analysis:\n", + " count mean sum\n", + "DayOfWeek \n", + "Monday 29 1017.90 29519\n", + "Tuesday 29 1003.07 29089\n", + "Wednesday 29 956.72 27745\n", + "Thursday 29 963.62 27945\n", + "Friday 28 1136.71 31828\n", + "Saturday 28 1018.32 28513\n", + "Sunday 28 916.64 25666\n" + ] + } + ], + "source": [ + "# Time-based grouping\n", + "print(\"Weekly sales analysis:\")\n", + "df_sales['Week'] = df_sales['Date'].dt.isocalendar().week\n", + "weekly_analysis = df_sales.groupby('Week').agg({\n", + " 'Sales': ['sum', 'mean', 'count'],\n", + " 'Product': lambda x: x.mode().iloc[0] if not x.mode().empty else 'None', # Most common product\n", + " 'Salesperson': 'nunique'\n", + "}).round(2)\n", + "print(weekly_analysis.head(10))\n", + "\n", + "print(\"\\nDay of week analysis:\")\n", + "df_sales['DayOfWeek'] = df_sales['Date'].dt.day_name()\n", + "day_analysis = df_sales.groupby('DayOfWeek')['Sales'].agg(['count', 'mean', 'sum']).round(2)\n", + "# Reorder by weekday\n", + "day_order = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']\n", + "day_analysis = day_analysis.reindex([day for day in day_order if day in day_analysis.index])\n", + "print(day_analysis)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 8. Performance Considerations\n", + "\n", + "Tips for efficient groupby operations." + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Large dataset size: 2000 rows\n", + "Multiple groupby calls: 0.0016 seconds\n", + "Single groupby with agg: 0.0007 seconds\n", + "Efficiency gain: 2.44x faster\n", + "\n", + "Results are equivalent: True\n" + ] + } + ], + "source": [ + "# Efficient groupby operations\n", + "import time\n", + "\n", + "# Create larger dataset for timing comparison\n", + "large_df = pd.concat([df_sales] * 10, ignore_index=True)\n", + "print(f\"Large dataset size: {len(large_df)} rows\")\n", + "\n", + "# Method 1: Multiple separate groupby calls (less efficient)\n", + "start_time = time.time()\n", + "result1_sum = large_df.groupby('Product')['Sales'].sum()\n", + "result1_mean = large_df.groupby('Product')['Sales'].mean()\n", + "result1_count = large_df.groupby('Product')['Sales'].count()\n", + "time1 = time.time() - start_time\n", + "\n", + "# Method 2: Single groupby with agg (more efficient)\n", + "start_time = time.time()\n", + "result2 = large_df.groupby('Product')['Sales'].agg(['sum', 'mean', 'count'])\n", + "time2 = time.time() - start_time\n", + "\n", + "print(f\"Multiple groupby calls: {time1:.4f} seconds\")\n", + "print(f\"Single groupby with agg: {time2:.4f} seconds\")\n", + "print(f\"Efficiency gain: {time1/time2:.2f}x faster\")\n", + "\n", + "# Verify results are the same\n", + "print(f\"\\nResults are equivalent: {result1_sum.equals(result2['sum'])}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Practice Exercises\n", + "\n", + "Apply your grouping and aggregation skills:" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": {}, + "outputs": [], + "source": [ + "# Exercise 1: Sales Performance Analysis\n", + "# Create a comprehensive sales performance report that includes:\n", + "# - Total and average sales by salesperson and region\n", + "# - Commission earned by each salesperson\n", + "# - Performance ranking within each region\n", + "# - Identify top and bottom performers\n", + "\n", + "# Your code here:\n", + "def sales_performance_report(df):\n", + " \"\"\"Generate comprehensive sales performance report\"\"\"\n", + " # Your implementation here\n", + " pass\n", + "\n", + "# sales_performance_report(df_sales)" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": {}, + "outputs": [], + "source": [ + "# Exercise 2: Product Analysis\n", + "# Analyze product performance including:\n", + "# - Which products are most/least popular (by quantity and sales)\n", + "# - Seasonal trends for each product\n", + "# - Regional preferences for different products\n", + "# - Price consistency across regions\n", + "\n", + "# Your code here:\n" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "metadata": {}, + "outputs": [], + "source": [ + "# Exercise 3: Custom Business Metrics\n", + "# Create custom aggregation functions to calculate:\n", + "# - Customer acquisition cost (if you have marketing spend data)\n", + "# - Sales velocity (sales per day) for each product\n", + "# - Market share by region\n", + "# - Performance consistency score\n", + "\n", + "# Your code here:\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Key Takeaways\n", + "\n", + "1. **GroupBy Basics**: `.groupby()` splits data into groups based on categorical variables\n", + "2. **Aggregation Functions**: Use built-in functions (`sum`, `mean`, `count`) or custom functions\n", + "3. **Multiple Aggregations**: Use `.agg()` with lists or dictionaries for multiple operations\n", + "4. **Hierarchical Indexing**: Multiple group columns create hierarchical indices\n", + "5. **Transform vs Apply**: `.transform()` preserves original size, `.apply()` can return different structures\n", + "6. **Filtering Groups**: Use `.filter()` to remove entire groups based on conditions\n", + "7. **Performance**: Single `.agg()` calls are more efficient than multiple `.groupby()` operations\n", + "\n", + "## Common Patterns\n", + "\n", + "```python\n", + "# Basic aggregation\n", + "df.groupby('column')['value'].sum()\n", + "\n", + "# Multiple aggregations\n", + "df.groupby('column')['value'].agg(['sum', 'mean', 'count'])\n", + "\n", + "# Multiple columns and aggregations\n", + "df.groupby('group_col').agg({\n", + " 'col1': ['sum', 'mean'],\n", + " 'col2': 'count'\n", + "})\n", + "\n", + "# Custom aggregation\n", + "df.groupby('column')['value'].agg(lambda x: x.max() - x.min())\n", + "\n", + "# Transform for group statistics\n", + "df['group_mean'] = df.groupby('group')['value'].transform('mean')\n", + "```" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.13.3" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/Session_01/PandasDataFrame-exmples/05_adding_modifying_columns.ipynb b/Session_01/PandasDataFrame-exmples/05_adding_modifying_columns.ipynb new file mode 100755 index 0000000..a857628 --- /dev/null +++ b/Session_01/PandasDataFrame-exmples/05_adding_modifying_columns.ipynb @@ -0,0 +1,733 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Session 1 - DataFrames - Lesson 5: Adding and Modifying Columns\n", + "\n", + "## Learning Objectives\n", + "- Learn different methods to add new columns to DataFrames\n", + "- Master conditional column creation using various techniques\n", + "- Understand how to modify existing columns\n", + "- Practice with calculated fields and derived columns\n", + "- Explore data type conversions and transformations\n", + "\n", + "## Prerequisites\n", + "- Completed Lessons 1-4\n", + "- Understanding of basic Python operations and functions" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Import required libraries\n", + "import pandas as pd\n", + "import numpy as np\n", + "from datetime import datetime, timedelta\n", + "\n", + "# Create sample dataset\n", + "np.random.seed(42)\n", + "n_records = 150\n", + "\n", + "sales_data = {\n", + " 'Date': pd.date_range('2024-01-01', periods=n_records, freq='D'),\n", + " 'Product': np.random.choice(['Laptop', 'Phone', 'Tablet', 'Monitor'], n_records),\n", + " 'Sales': np.random.normal(1000, 200, n_records).astype(int),\n", + " 'Quantity': np.random.randint(1, 8, n_records),\n", + " 'Region': np.random.choice(['North', 'South', 'East', 'West'], n_records),\n", + " 'Salesperson': np.random.choice(['Alice', 'Bob', 'Charlie', 'Diana', 'Eve'], n_records),\n", + " 'Customer_Type': np.random.choice(['New', 'Returning', 'VIP'], n_records, p=[0.3, 0.6, 0.1])\n", + "}\n", + "\n", + "df_sales = pd.DataFrame(sales_data)\n", + "df_sales['Sales'] = np.abs(df_sales['Sales']) # Ensure positive values\n", + "\n", + "print(\"Original dataset:\")\n", + "print(f\"Shape: {df_sales.shape}\")\n", + "print(\"\\nFirst few rows:\")\n", + "print(df_sales.head())\n", + "print(\"\\nData types:\")\n", + "print(df_sales.dtypes)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 1. Basic Column Addition\n", + "\n", + "Simple methods to add new columns." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Method 1: Direct assignment\n", + "df_modified = df_sales.copy()\n", + "\n", + "# Add simple calculated columns\n", + "df_modified['Revenue'] = df_modified['Sales'] * df_modified['Quantity']\n", + "df_modified['Commission_10%'] = df_modified['Sales'] * 0.10\n", + "df_modified['Sales_per_Unit'] = df_modified['Sales'] / df_modified['Quantity']\n", + "\n", + "print(\"New calculated columns:\")\n", + "print(df_modified[['Sales', 'Quantity', 'Revenue', 'Commission_10%', 'Sales_per_Unit']].head())\n", + "\n", + "# Add constant value column\n", + "df_modified['Year'] = 2024\n", + "df_modified['Currency'] = 'USD'\n", + "df_modified['Department'] = 'Sales'\n", + "\n", + "print(\"\\nConstant value columns added:\")\n", + "print(df_modified[['Year', 'Currency', 'Department']].head())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Method 2: Using assign() method (more functional approach)\n", + "df_assigned = df_sales.assign(\n", + " Revenue=lambda x: x['Sales'] * x['Quantity'],\n", + " Commission_Rate=0.08,\n", + " Commission_Amount=lambda x: x['Sales'] * 0.08,\n", + " Sales_Squared=lambda x: x['Sales'] ** 2,\n", + " Is_High_Volume=lambda x: x['Quantity'] > 5\n", + ")\n", + "\n", + "print(\"Using assign() method:\")\n", + "print(df_assigned[['Sales', 'Quantity', 'Revenue', 'Commission_Amount', 'Is_High_Volume']].head())\n", + "\n", + "print(f\"\\nOriginal shape: {df_sales.shape}\")\n", + "print(f\"Modified shape: {df_assigned.shape}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Method 3: Using insert() for specific positioning\n", + "df_insert = df_sales.copy()\n", + "\n", + "# Insert column at specific position (after 'Sales')\n", + "sales_index = df_insert.columns.get_loc('Sales')\n", + "df_insert.insert(sales_index + 1, 'Sales_Tax', df_insert['Sales'] * 0.08)\n", + "df_insert.insert(sales_index + 2, 'Total_with_Tax', df_insert['Sales'] + df_insert['Sales_Tax'])\n", + "\n", + "print(\"Using insert() for positioned columns:\")\n", + "print(df_insert[['Product', 'Sales', 'Sales_Tax', 'Total_with_Tax', 'Quantity']].head())\n", + "print(f\"\\nColumn order: {list(df_insert.columns)}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 2. Conditional Column Creation\n", + "\n", + "Create columns based on conditions and business logic." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Method 1: Using np.where() for simple conditions\n", + "df_conditional = df_sales.copy()\n", + "\n", + "# Simple binary conditions\n", + "df_conditional['High_Sales'] = np.where(df_conditional['Sales'] > 1000, 'Yes', 'No')\n", + "df_conditional['Weekend'] = np.where(df_conditional['Date'].dt.dayofweek >= 5, 'Weekend', 'Weekday')\n", + "df_conditional['Bulk_Order'] = np.where(df_conditional['Quantity'] >= 5, 'Bulk', 'Regular')\n", + "\n", + "print(\"Simple conditional columns:\")\n", + "print(df_conditional[['Sales', 'High_Sales', 'Date', 'Weekend', 'Quantity', 'Bulk_Order']].head())\n", + "\n", + "# Nested conditions\n", + "df_conditional['Sales_Category'] = np.where(df_conditional['Sales'] > 1200, 'High',\n", + " np.where(df_conditional['Sales'] > 800, 'Medium', 'Low'))\n", + "\n", + "print(\"\\nNested conditions:\")\n", + "print(df_conditional[['Sales', 'Sales_Category']].head(10))\n", + "print(\"\\nCategory distribution:\")\n", + "print(df_conditional['Sales_Category'].value_counts())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Method 2: Using pd.cut() for binning numerical data\n", + "df_conditional['Sales_Tier'] = pd.cut(df_conditional['Sales'], \n", + " bins=[0, 500, 800, 1200, float('inf')],\n", + " labels=['Entry', 'Standard', 'Premium', 'Luxury'])\n", + "\n", + "print(\"Using pd.cut() for binning:\")\n", + "print(df_conditional[['Sales', 'Sales_Tier']].head(10))\n", + "print(\"\\nTier distribution:\")\n", + "print(df_conditional['Sales_Tier'].value_counts())\n", + "\n", + "# Using pd.qcut() for quantile-based binning\n", + "df_conditional['Sales_Quintile'] = pd.qcut(df_conditional['Sales'], \n", + " q=5, \n", + " labels=['Bottom 20%', 'Low 20%', 'Mid 20%', 'High 20%', 'Top 20%'])\n", + "\n", + "print(\"\\nUsing pd.qcut() for quantile binning:\")\n", + "print(df_conditional['Sales_Quintile'].value_counts())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Method 3: Using pandas.select() for multiple conditions\n", + "# Define conditions and choices\n", + "conditions = [\n", + " (df_conditional['Sales'] >= 1200) & (df_conditional['Quantity'] >= 5),\n", + " (df_conditional['Sales'] >= 1000) & (df_conditional['Customer_Type'] == 'VIP'),\n", + " (df_conditional['Sales'] >= 800) & (df_conditional['Region'] == 'North'),\n", + " df_conditional['Customer_Type'] == 'New'\n", + "]\n", + "\n", + "choices = ['Premium Deal', 'VIP Sale', 'North Preferred', 'New Customer']\n", + "default = 'Standard'\n", + "\n", + "df_conditional['Deal_Type'] = np.select(conditions, choices, default=default)\n", + "\n", + "print(\"Using np.select() for complex conditions:\")\n", + "print(df_conditional[['Sales', 'Quantity', 'Customer_Type', 'Region', 'Deal_Type']].head(10))\n", + "print(\"\\nDeal type distribution:\")\n", + "print(df_conditional['Deal_Type'].value_counts())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 3. Using Apply and Lambda Functions\n", + "\n", + "Create complex calculated columns using custom functions." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Simple lambda functions\n", + "df_apply = df_sales.copy()\n", + "\n", + "# Single column transformations\n", + "df_apply['Sales_Log'] = df_apply['Sales'].apply(lambda x: np.log(x))\n", + "df_apply['Product_Length'] = df_apply['Product'].apply(lambda x: len(x))\n", + "df_apply['Days_Since_Start'] = df_apply['Date'].apply(lambda x: (x - df_apply['Date'].min()).days)\n", + "\n", + "print(\"Simple lambda transformations:\")\n", + "print(df_apply[['Sales', 'Sales_Log', 'Product', 'Product_Length', 'Days_Since_Start']].head())\n", + "\n", + "# Multiple column operations using lambda\n", + "df_apply['Efficiency_Score'] = df_apply.apply(\n", + " lambda row: (row['Sales'] * row['Quantity']) / (row['Days_Since_Start'] + 1), \n", + " axis=1\n", + ")\n", + "\n", + "print(\"\\nMultiple column lambda:\")\n", + "print(df_apply[['Sales', 'Quantity', 'Days_Since_Start', 'Efficiency_Score']].head())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Custom functions for complex business logic\n", + "def calculate_commission(row):\n", + " \"\"\"Calculate commission based on complex business rules\"\"\"\n", + " base_rate = 0.05\n", + " \n", + " # VIP customers get higher commission\n", + " if row['Customer_Type'] == 'VIP':\n", + " base_rate += 0.02\n", + " \n", + " # High quantity orders get bonus\n", + " if row['Quantity'] >= 5:\n", + " base_rate += 0.01\n", + " \n", + " # Regional multipliers\n", + " region_multipliers = {'North': 1.2, 'South': 1.0, 'East': 1.1, 'West': 0.9}\n", + " multiplier = region_multipliers.get(row['Region'], 1.0)\n", + " \n", + " return row['Sales'] * base_rate * multiplier\n", + "\n", + "def performance_rating(row):\n", + " \"\"\"Calculate performance rating based on multiple factors\"\"\"\n", + " score = 0\n", + " \n", + " # Sales performance (40% weight)\n", + " if row['Sales'] > 1200:\n", + " score += 40\n", + " elif row['Sales'] > 800:\n", + " score += 30\n", + " else:\n", + " score += 20\n", + " \n", + " # Quantity performance (30% weight)\n", + " if row['Quantity'] >= 6:\n", + " score += 30\n", + " elif row['Quantity'] >= 4:\n", + " score += 20\n", + " else:\n", + " score += 10\n", + " \n", + " # Customer type bonus (30% weight)\n", + " customer_bonus = {'VIP': 30, 'Returning': 20, 'New': 15}\n", + " score += customer_bonus.get(row['Customer_Type'], 0)\n", + " \n", + " # Convert to letter grade\n", + " if score >= 85:\n", + " return 'A'\n", + " elif score >= 70:\n", + " return 'B'\n", + " elif score >= 55:\n", + " return 'C'\n", + " else:\n", + " return 'D'\n", + "\n", + "# Apply custom functions\n", + "df_apply['Commission'] = df_apply.apply(calculate_commission, axis=1)\n", + "df_apply['Performance_Rating'] = df_apply.apply(performance_rating, axis=1)\n", + "\n", + "print(\"Custom function results:\")\n", + "print(df_apply[['Sales', 'Quantity', 'Customer_Type', 'Region', 'Commission', 'Performance_Rating']].head())\n", + "\n", + "print(\"\\nPerformance rating distribution:\")\n", + "print(df_apply['Performance_Rating'].value_counts().sort_index())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 4. Date and Time Derived Columns\n", + "\n", + "Extract useful information from datetime columns." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Extract date components\n", + "df_dates = df_sales.copy()\n", + "\n", + "# Basic date components\n", + "df_dates['Year'] = df_dates['Date'].dt.year\n", + "df_dates['Month'] = df_dates['Date'].dt.month\n", + "df_dates['Day'] = df_dates['Date'].dt.day\n", + "df_dates['DayOfWeek'] = df_dates['Date'].dt.dayofweek # 0=Monday, 6=Sunday\n", + "df_dates['DayName'] = df_dates['Date'].dt.day_name()\n", + "df_dates['MonthName'] = df_dates['Date'].dt.month_name()\n", + "\n", + "print(\"Basic date components:\")\n", + "print(df_dates[['Date', 'Year', 'Month', 'Day', 'DayOfWeek', 'DayName', 'MonthName']].head())\n", + "\n", + "# Business-relevant date features\n", + "df_dates['Quarter'] = df_dates['Date'].dt.quarter\n", + "df_dates['Week'] = df_dates['Date'].dt.isocalendar().week\n", + "df_dates['DayOfYear'] = df_dates['Date'].dt.dayofyear\n", + "df_dates['IsWeekend'] = df_dates['Date'].dt.dayofweek >= 5\n", + "df_dates['IsMonthStart'] = df_dates['Date'].dt.is_month_start\n", + "df_dates['IsMonthEnd'] = df_dates['Date'].dt.is_month_end\n", + "\n", + "print(\"\\nBusiness date features:\")\n", + "print(df_dates[['Date', 'Quarter', 'Week', 'IsWeekend', 'IsMonthStart', 'IsMonthEnd']].head(10))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Time-based calculations\n", + "start_date = df_dates['Date'].min()\n", + "df_dates['Days_Since_Start'] = (df_dates['Date'] - start_date).dt.days\n", + "df_dates['Weeks_Since_Start'] = df_dates['Days_Since_Start'] // 7\n", + "\n", + "# Create season column\n", + "def get_season(month):\n", + " if month in [12, 1, 2]:\n", + " return 'Winter'\n", + " elif month in [3, 4, 5]:\n", + " return 'Spring'\n", + " elif month in [6, 7, 8]:\n", + " return 'Summer'\n", + " else:\n", + " return 'Fall'\n", + "\n", + "df_dates['Season'] = df_dates['Month'].apply(get_season)\n", + "\n", + "# Business day calculations\n", + "df_dates['IsBusinessDay'] = df_dates['Date'].dt.dayofweek < 5\n", + "df_dates['BusinessDaysSinceStart'] = df_dates.apply(\n", + " lambda row: np.busday_count(start_date.date(), row['Date'].date()), axis=1\n", + ")\n", + "\n", + "print(\"Time-based calculations:\")\n", + "print(df_dates[['Date', 'Days_Since_Start', 'Weeks_Since_Start', 'Season', \n", + " 'IsBusinessDay', 'BusinessDaysSinceStart']].head())\n", + "\n", + "print(\"\\nSeason distribution:\")\n", + "print(df_dates['Season'].value_counts())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 5. Text and String Manipulations\n", + "\n", + "Create columns based on string operations and text processing." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# String manipulations\n", + "df_text = df_sales.copy()\n", + "\n", + "# Basic string operations\n", + "df_text['Product_Upper'] = df_text['Product'].str.upper()\n", + "df_text['Product_Lower'] = df_text['Product'].str.lower()\n", + "df_text['Product_Length'] = df_text['Product'].str.len()\n", + "df_text['Product_First_Char'] = df_text['Product'].str[0]\n", + "df_text['Product_Last_Three'] = df_text['Product'].str[-3:]\n", + "\n", + "print(\"Basic string operations:\")\n", + "print(df_text[['Product', 'Product_Upper', 'Product_Lower', 'Product_Length', \n", + " 'Product_First_Char', 'Product_Last_Three']].head())\n", + "\n", + "# Text categorization\n", + "df_text['Product_Category'] = df_text['Product'].apply(lambda x: \n", + " 'Computer' if x in ['Laptop', 'Monitor'] else\n", + " 'Mobile' if x in ['Phone', 'Tablet'] else\n", + " 'Other'\n", + ")\n", + "\n", + "# Check for patterns\n", + "df_text['Has_Letter_A'] = df_text['Product'].str.contains('a', case=False)\n", + "df_text['Starts_With_L'] = df_text['Product'].str.startswith('L')\n", + "df_text['Ends_With_E'] = df_text['Product'].str.endswith('e')\n", + "\n", + "print(\"\\nText patterns and categorization:\")\n", + "print(df_text[['Product', 'Product_Category', 'Has_Letter_A', 'Starts_With_L', 'Ends_With_E']].head(10))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Create formatted text columns\n", + "df_text['Sales_Formatted'] = df_text['Sales'].apply(lambda x: f\"${x:,.2f}\")\n", + "df_text['Transaction_ID'] = df_text.apply(\n", + " lambda row: f\"{row['Region'][:1]}{row['Product'][:3].upper()}{row.name:04d}\", axis=1\n", + ")\n", + "\n", + "# Create summary descriptions\n", + "df_text['Transaction_Summary'] = df_text.apply(\n", + " lambda row: f\"{row['Salesperson']} sold {row['Quantity']} {row['Product']}(s) \"\n", + " f\"for {row['Sales_Formatted']} in {row['Region']} region\", \n", + " axis=1\n", + ")\n", + "\n", + "print(\"Formatted text columns:\")\n", + "print(df_text[['Sales_Formatted', 'Transaction_ID']].head())\n", + "print(\"\\nTransaction summaries:\")\n", + "for i, summary in enumerate(df_text['Transaction_Summary'].head(3)):\n", + " print(f\"{i+1}. {summary}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 6. Working with Categorical Data\n", + "\n", + "Optimize memory usage and enable category-specific operations." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Convert to categorical data types\n", + "df_categorical = df_sales.copy()\n", + "\n", + "# Check memory usage before\n", + "print(\"Memory usage before categorical conversion:\")\n", + "print(df_categorical.memory_usage(deep=True))\n", + "\n", + "# Convert string columns to categorical\n", + "categorical_columns = ['Product', 'Region', 'Salesperson', 'Customer_Type']\n", + "for col in categorical_columns:\n", + " df_categorical[col] = df_categorical[col].astype('category')\n", + "\n", + "print(\"\\nMemory usage after categorical conversion:\")\n", + "print(df_categorical.memory_usage(deep=True))\n", + "\n", + "print(\"\\nData types after conversion:\")\n", + "print(df_categorical.dtypes)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Working with ordered categories\n", + "# Create ordered categorical for sales performance\n", + "performance_categories = ['Poor', 'Fair', 'Good', 'Excellent']\n", + "df_categorical['Performance_Level'] = pd.cut(\n", + " df_categorical['Sales'],\n", + " bins=[0, 700, 900, 1200, float('inf')],\n", + " labels=performance_categories,\n", + " ordered=True\n", + ")\n", + "\n", + "print(\"Ordered categorical data:\")\n", + "print(df_categorical['Performance_Level'].head(10))\n", + "print(\"\\nCategory info:\")\n", + "print(df_categorical['Performance_Level'].cat.categories)\n", + "print(f\"Is ordered: {df_categorical['Performance_Level'].cat.ordered}\")\n", + "\n", + "# Categorical operations\n", + "print(\"\\nPerformance level distribution:\")\n", + "print(df_categorical['Performance_Level'].value_counts().sort_index())\n", + "\n", + "# Add new category\n", + "df_categorical['Performance_Level'] = df_categorical['Performance_Level'].cat.add_categories(['Outstanding'])\n", + "print(f\"\\nCategories after adding 'Outstanding': {df_categorical['Performance_Level'].cat.categories}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 7. Mathematical and Statistical Transformations\n", + "\n", + "Create columns using mathematical functions and statistical transformations." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Mathematical transformations\n", + "df_math = df_sales.copy()\n", + "\n", + "# Common mathematical transformations\n", + "df_math['Sales_Log'] = np.log(df_math['Sales'])\n", + "df_math['Sales_Sqrt'] = np.sqrt(df_math['Sales'])\n", + "df_math['Sales_Squared'] = df_math['Sales'] ** 2\n", + "df_math['Sales_Reciprocal'] = 1 / df_math['Sales']\n", + "\n", + "print(\"Mathematical transformations:\")\n", + "print(df_math[['Sales', 'Sales_Log', 'Sales_Sqrt', 'Sales_Squared', 'Sales_Reciprocal']].head())\n", + "\n", + "# Statistical standardization\n", + "df_math['Sales_Z_Score'] = (df_math['Sales'] - df_math['Sales'].mean()) / df_math['Sales'].std()\n", + "df_math['Sales_Min_Max_Scaled'] = (df_math['Sales'] - df_math['Sales'].min()) / (df_math['Sales'].max() - df_math['Sales'].min())\n", + "\n", + "# Rolling statistics\n", + "df_math = df_math.sort_values('Date')\n", + "df_math['Sales_Rolling_7_Mean'] = df_math['Sales'].rolling(window=7, min_periods=1).mean()\n", + "df_math['Sales_Rolling_7_Std'] = df_math['Sales'].rolling(window=7, min_periods=1).std()\n", + "df_math['Sales_Cumulative_Sum'] = df_math['Sales'].cumsum()\n", + "\n", + "print(\"\\nStatistical transformations:\")\n", + "print(df_math[['Sales', 'Sales_Z_Score', 'Sales_Min_Max_Scaled', \n", + " 'Sales_Rolling_7_Mean', 'Sales_Cumulative_Sum']].head(10))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Rank and percentile columns\n", + "df_math['Sales_Rank'] = df_math['Sales'].rank(ascending=False)\n", + "df_math['Sales_Percentile'] = df_math['Sales'].rank(pct=True) * 100\n", + "df_math['Sales_Rank_by_Region'] = df_math.groupby('Region')['Sales'].rank(ascending=False)\n", + "\n", + "# Binning and discretization\n", + "df_math['Sales_Decile'] = pd.qcut(df_math['Sales'], q=10, labels=range(1, 11))\n", + "df_math['Sales_Tertile'] = pd.qcut(df_math['Sales'], q=3, labels=['Low', 'Medium', 'High'])\n", + "\n", + "print(\"Ranking and binning:\")\n", + "print(df_math[['Sales', 'Sales_Rank', 'Sales_Percentile', 'Sales_Rank_by_Region', \n", + " 'Sales_Decile', 'Sales_Tertile']].head(10))\n", + "\n", + "print(\"\\nDecile distribution:\")\n", + "print(df_math['Sales_Decile'].value_counts().sort_index())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Practice Exercises\n", + "\n", + "Apply your column creation and modification skills:" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [], + "source": [ + "# Exercise 1: Customer Segmentation\n", + "# Create a comprehensive customer segmentation system:\n", + "# - Combine purchase behavior, frequency, and value\n", + "# - Create RFM-like scores (Recency, Frequency, Monetary)\n", + "# - Assign customer segments (e.g., Champion, Loyal, At Risk, etc.)\n", + "\n", + "def create_customer_segmentation(df):\n", + " \"\"\"Create customer segmentation based on purchase patterns\"\"\"\n", + " # Your implementation here\n", + " pass\n", + "\n", + "# segmented_df = create_customer_segmentation(df_sales)\n", + "# print(segmented_df[['Customer_Type', 'Sales', 'Frequency_Score', 'Monetary_Score', 'Segment']].head())" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [], + "source": [ + "# Exercise 2: Performance Metrics Dashboard\n", + "# Create a comprehensive set of KPI columns:\n", + "# - Sales efficiency metrics\n", + "# - Trend indicators (growth rates, momentum)\n", + "# - Comparative metrics (vs. average, vs. target)\n", + "# - Alert flags for unusual patterns\n", + "\n", + "# Your code here:\n" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [], + "source": [ + "# Exercise 3: Feature Engineering for ML\n", + "# Create features that could be useful for machine learning:\n", + "# - Interaction features (product of two variables)\n", + "# - Polynomial features\n", + "# - Time-based features (seasonality, trends)\n", + "# - Lag features (previous period values)\n", + "\n", + "# Your code here:\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Key Takeaways\n", + "\n", + "1. **Column Assignment**: Use direct assignment (`df['col'] = value`) for simple cases\n", + "2. **Assign Method**: Use `.assign()` for functional programming style and method chaining\n", + "3. **Conditional Logic**: Combine `np.where()`, `pd.cut()`, `pd.qcut()`, and `np.select()` for complex conditions\n", + "4. **Apply Functions**: Use `.apply()` with lambda or custom functions for complex transformations\n", + "5. **Date Features**: Extract meaningful components from datetime columns\n", + "6. **String Operations**: Leverage `.str` accessor for text manipulations\n", + "7. **Categorical Data**: Convert to categories for memory efficiency and special operations\n", + "8. **Mathematical Transformations**: Apply statistical and mathematical functions for data preprocessing\n", + "\n", + "## Performance Tips\n", + "\n", + "1. **Vectorized Operations**: Prefer pandas/numpy operations over loops\n", + "2. **Categorical Types**: Use categorical data for repeated string values\n", + "3. **Memory Management**: Monitor memory usage when creating many new columns\n", + "4. **Method Chaining**: Use `.assign()` for readable method chains\n", + "5. **Avoid apply() When Possible**: Use vectorized operations instead of `.apply()` for better performance\n", + "\n", + "## Common Patterns\n", + "\n", + "```python\n", + "# Simple calculation\n", + "df['new_col'] = df['col1'] * df['col2']\n", + "\n", + "# Conditional column\n", + "df['category'] = np.where(df['value'] > threshold, 'High', 'Low')\n", + "\n", + "# Apply custom function\n", + "df['result'] = df.apply(custom_function, axis=1)\n", + "\n", + "# Date features\n", + "df['month'] = df['date'].dt.month\n", + "\n", + "# String operations\n", + "df['upper'] = df['text'].str.upper()\n", + "```" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.13.3" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/Session_01/PandasDataFrame-exmples/06_handling_missing_data.ipynb b/Session_01/PandasDataFrame-exmples/06_handling_missing_data.ipynb new file mode 100755 index 0000000..180a3c8 --- /dev/null +++ b/Session_01/PandasDataFrame-exmples/06_handling_missing_data.ipynb @@ -0,0 +1,916 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Session 1 - DataFrames - Lesson 6: Handling Missing Data\n", + "\n", + "## Learning Objectives\n", + "- Understand different types of missing data and their implications\n", + "- Master techniques for detecting and analyzing missing values\n", + "- Learn various strategies for handling missing data\n", + "- Practice imputation methods and their trade-offs\n", + "- Develop best practices for missing data management\n", + "\n", + "## Prerequisites\n", + "- Completed Lessons 1-5\n", + "- Understanding of basic statistical concepts\n", + "- Familiarity with data quality principles" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Import required libraries\n", + "import pandas as pd\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "import seaborn as sns\n", + "from datetime import datetime, timedelta\n", + "import warnings\n", + "warnings.filterwarnings('ignore')\n", + "\n", + "# Set display options\n", + "pd.set_option('display.max_columns', None)\n", + "plt.style.use('seaborn-v0_8')\n", + "\n", + "print(\"Libraries loaded successfully!\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Creating Dataset with Missing Values\n", + "\n", + "Let's create a realistic dataset with different patterns of missing data." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Create comprehensive dataset with various missing data patterns\n", + "np.random.seed(42)\n", + "n_records = 500\n", + "\n", + "# Base data\n", + "data = {\n", + " 'customer_id': range(1, n_records + 1),\n", + " 'age': np.random.normal(35, 12, n_records).astype(int),\n", + " 'income': np.random.normal(50000, 15000, n_records),\n", + " 'education_years': np.random.normal(14, 3, n_records),\n", + " 'purchase_amount': np.random.normal(200, 50, n_records),\n", + " 'satisfaction_score': np.random.randint(1, 6, n_records),\n", + " 'region': np.random.choice(['North', 'South', 'East', 'West'], n_records),\n", + " 'product_category': np.random.choice(['Electronics', 'Clothing', 'Books', 'Home'], n_records),\n", + " 'signup_date': pd.date_range('2023-01-01', periods=n_records, freq='D'),\n", + " 'last_purchase_date': pd.date_range('2023-01-01', periods=n_records, freq='D') + pd.Timedelta(days=30)\n", + "}\n", + "\n", + "df_complete = pd.DataFrame(data)\n", + "\n", + "# Ensure positive values where appropriate\n", + "df_complete['age'] = np.abs(df_complete['age'])\n", + "df_complete['income'] = np.abs(df_complete['income'])\n", + "df_complete['education_years'] = np.clip(df_complete['education_years'], 6, 20)\n", + "df_complete['purchase_amount'] = np.abs(df_complete['purchase_amount'])\n", + "\n", + "print(\"Complete dataset created:\")\n", + "print(f\"Shape: {df_complete.shape}\")\n", + "print(\"\\nFirst few rows:\")\n", + "print(df_complete.head())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Introduce different patterns of missing data\n", + "df_missing = df_complete.copy()\n", + "\n", + "# 1. Missing Completely at Random (MCAR) - income data\n", + "# Randomly missing 15% of income values\n", + "mcar_indices = np.random.choice(df_missing.index, size=int(0.15 * len(df_missing)), replace=False)\n", + "df_missing.loc[mcar_indices, 'income'] = np.nan\n", + "\n", + "# 2. Missing at Random (MAR) - education years missing based on age\n", + "# Older people less likely to report education\n", + "older_customers = df_missing['age'] > 60\n", + "older_indices = df_missing[older_customers].index\n", + "education_missing = np.random.choice(older_indices, size=int(0.4 * len(older_indices)), replace=False)\n", + "df_missing.loc[education_missing, 'education_years'] = np.nan\n", + "\n", + "# 3. Missing Not at Random (MNAR) - satisfaction scores\n", + "# Unsatisfied customers less likely to provide ratings\n", + "low_satisfaction = df_missing['satisfaction_score'] <= 2\n", + "low_sat_indices = df_missing[low_satisfaction].index\n", + "satisfaction_missing = np.random.choice(low_sat_indices, size=int(0.6 * len(low_sat_indices)), replace=False)\n", + "df_missing.loc[satisfaction_missing, 'satisfaction_score'] = np.nan\n", + "\n", + "# 4. Systematic missing - last purchase date for new customers\n", + "# New customers (signed up recently) haven't made purchases yet\n", + "recent_signups = df_missing['signup_date'] > '2023-11-01'\n", + "df_missing.loc[recent_signups, 'last_purchase_date'] = pd.NaT\n", + "\n", + "# 5. Random missing in other columns\n", + "# Purchase amount - 10% missing\n", + "purchase_missing = np.random.choice(df_missing.index, size=int(0.10 * len(df_missing)), replace=False)\n", + "df_missing.loc[purchase_missing, 'purchase_amount'] = np.nan\n", + "\n", + "print(\"Missing data patterns introduced:\")\n", + "print(f\"Dataset shape: {df_missing.shape}\")\n", + "print(\"\\nMissing value counts:\")\n", + "missing_summary = df_missing.isnull().sum()\n", + "missing_summary = missing_summary[missing_summary > 0]\n", + "print(missing_summary)\n", + "\n", + "print(\"\\nMissing value percentages:\")\n", + "missing_pct = (df_missing.isnull().sum() / len(df_missing) * 100).round(2)\n", + "missing_pct = missing_pct[missing_pct > 0]\n", + "print(missing_pct)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 1. Detecting and Analyzing Missing Data\n", + "\n", + "Comprehensive techniques for understanding missing data patterns." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def analyze_missing_data(df):\n", + " \"\"\"Comprehensive missing data analysis\"\"\"\n", + " print(\"=== MISSING DATA ANALYSIS ===\")\n", + " \n", + " # Basic missing data statistics\n", + " total_cells = df.size\n", + " total_missing = df.isnull().sum().sum()\n", + " print(f\"Total cells: {total_cells:,}\")\n", + " print(f\"Missing cells: {total_missing:,} ({total_missing/total_cells*100:.2f}%)\")\n", + " \n", + " # Missing data by column\n", + " missing_by_column = pd.DataFrame({\n", + " 'Missing_Count': df.isnull().sum(),\n", + " 'Missing_Percentage': (df.isnull().sum() / len(df)) * 100,\n", + " 'Data_Type': df.dtypes\n", + " })\n", + " missing_by_column = missing_by_column[missing_by_column['Missing_Count'] > 0]\n", + " missing_by_column = missing_by_column.sort_values('Missing_Percentage', ascending=False)\n", + " \n", + " print(\"\\n--- Missing Data by Column ---\")\n", + " print(missing_by_column.round(2))\n", + " \n", + " # Missing data patterns\n", + " print(\"\\n--- Missing Data Patterns ---\")\n", + " missing_patterns = df.isnull().value_counts().head(10)\n", + " print(\"Top 10 missing patterns (True = Missing):\")\n", + " for pattern, count in missing_patterns.items():\n", + " percentage = (count / len(df)) * 100\n", + " print(f\"{count:4d} rows ({percentage:5.1f}%): {dict(zip(df.columns, pattern))}\")\n", + " \n", + " return missing_by_column\n", + "\n", + "# Analyze missing data\n", + "missing_analysis = analyze_missing_data(df_missing)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Visualize missing data patterns\n", + "def visualize_missing_data(df):\n", + " \"\"\"Create visualizations for missing data patterns\"\"\"\n", + " fig, axes = plt.subplots(2, 2, figsize=(15, 10))\n", + " \n", + " # 1. Missing data heatmap\n", + " missing_mask = df.isnull()\n", + " sns.heatmap(missing_mask.iloc[:100], \n", + " yticklabels=False, \n", + " cbar=True, \n", + " cmap='viridis',\n", + " ax=axes[0, 0])\n", + " axes[0, 0].set_title('Missing Data Heatmap (First 100 rows)')\n", + " \n", + " # 2. Missing data by column\n", + " missing_counts = df.isnull().sum()\n", + " missing_counts = missing_counts[missing_counts > 0]\n", + " missing_counts.plot(kind='bar', ax=axes[0, 1], color='skyblue')\n", + " axes[0, 1].set_title('Missing Values by Column')\n", + " axes[0, 1].set_ylabel('Count')\n", + " axes[0, 1].tick_params(axis='x', rotation=45)\n", + " \n", + " # 3. Missing data correlation\n", + " missing_corr = df.isnull().corr()\n", + " sns.heatmap(missing_corr, annot=True, cmap='coolwarm', center=0, ax=axes[1, 0])\n", + " axes[1, 0].set_title('Missing Data Correlation')\n", + " \n", + " # 4. Missing data by row\n", + " missing_per_row = df.isnull().sum(axis=1)\n", + " missing_per_row.hist(bins=range(len(df.columns) + 2), ax=axes[1, 1], alpha=0.7, color='orange')\n", + " axes[1, 1].set_title('Distribution of Missing Values per Row')\n", + " axes[1, 1].set_xlabel('Number of Missing Values')\n", + " axes[1, 1].set_ylabel('Number of Rows')\n", + " \n", + " plt.tight_layout()\n", + " plt.show()\n", + "\n", + "# Visualize missing patterns\n", + "visualize_missing_data(df_missing)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Analyze missing data relationships\n", + "def analyze_missing_relationships(df):\n", + " \"\"\"Analyze relationships between missing data and other variables\"\"\"\n", + " print(\"=== MISSING DATA RELATIONSHIPS ===\")\n", + " \n", + " # Example: Relationship between age and missing education\n", + " if 'age' in df.columns and 'education_years' in df.columns:\n", + " print(\"\\n--- Age vs Missing Education ---\")\n", + " education_missing = df['education_years'].isnull()\n", + " age_stats = df.groupby(education_missing)['age'].agg(['mean', 'median', 'std']).round(2)\n", + " age_stats.index = ['Education Present', 'Education Missing']\n", + " print(age_stats)\n", + " \n", + " # Example: Missing satisfaction by purchase amount\n", + " if 'satisfaction_score' in df.columns and 'purchase_amount' in df.columns:\n", + " print(\"\\n--- Purchase Amount vs Missing Satisfaction ---\")\n", + " satisfaction_missing = df['satisfaction_score'].isnull()\n", + " purchase_stats = df.groupby(satisfaction_missing)['purchase_amount'].agg(['mean', 'median', 'count']).round(2)\n", + " purchase_stats.index = ['Satisfaction Present', 'Satisfaction Missing']\n", + " print(purchase_stats)\n", + " \n", + " # Missing data by categorical variables\n", + " if 'region' in df.columns:\n", + " print(\"\\n--- Missing Data by Region ---\")\n", + " region_missing = df.groupby('region').apply(lambda x: x.isnull().sum())\n", + " print(region_missing[region_missing.sum(axis=1) > 0])\n", + "\n", + "# Analyze relationships\n", + "analyze_missing_relationships(df_missing)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 2. Basic Missing Data Handling\n", + "\n", + "Fundamental techniques for dealing with missing values." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Method 1: Dropping missing values\n", + "print(\"=== DROPPING MISSING VALUES ===\")\n", + "\n", + "# Drop rows with any missing values\n", + "df_drop_any = df_missing.dropna()\n", + "print(f\"Original shape: {df_missing.shape}\")\n", + "print(f\"After dropping any missing: {df_drop_any.shape}\")\n", + "print(f\"Rows removed: {len(df_missing) - len(df_drop_any)} ({(len(df_missing) - len(df_drop_any))/len(df_missing)*100:.1f}%)\")\n", + "\n", + "# Drop rows with missing values in specific columns\n", + "critical_columns = ['customer_id', 'age', 'region']\n", + "df_drop_critical = df_missing.dropna(subset=critical_columns)\n", + "print(f\"\\nAfter dropping rows missing critical columns: {df_drop_critical.shape}\")\n", + "\n", + "# Drop rows with more than X missing values\n", + "df_drop_thresh = df_missing.dropna(thresh=len(df_missing.columns) - 2) # Allow max 2 missing\n", + "print(f\"After dropping rows with >2 missing values: {df_drop_thresh.shape}\")\n", + "\n", + "# Drop columns with too many missing values\n", + "missing_threshold = 0.5 # 50%\n", + "cols_to_keep = df_missing.columns[df_missing.isnull().mean() < missing_threshold]\n", + "df_drop_cols = df_missing[cols_to_keep]\n", + "print(f\"\\nAfter dropping columns with >{missing_threshold*100}% missing: {df_drop_cols.shape}\")\n", + "print(f\"Columns dropped: {set(df_missing.columns) - set(cols_to_keep)}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Method 2: Basic imputation with fillna()\n", + "print(\"=== BASIC IMPUTATION ===\")\n", + "\n", + "df_basic_impute = df_missing.copy()\n", + "\n", + "# Fill with specific values\n", + "df_basic_impute['satisfaction_score'] = df_basic_impute['satisfaction_score'].fillna(3) # Neutral score\n", + "print(\"Filled satisfaction_score with 3 (neutral)\")\n", + "\n", + "# Fill with statistical measures\n", + "df_basic_impute['income'] = df_basic_impute['income'].fillna(df_basic_impute['income'].median())\n", + "df_basic_impute['education_years'] = df_basic_impute['education_years'].fillna(df_basic_impute['education_years'].mean())\n", + "df_basic_impute['purchase_amount'] = df_basic_impute['purchase_amount'].fillna(df_basic_impute['purchase_amount'].mean())\n", + "print(\"Filled numerical columns with mean/median\")\n", + "\n", + "# Forward fill and backward fill for dates\n", + "df_basic_impute['last_purchase_date'] = df_basic_impute['last_purchase_date'].fillna(method='bfill')\n", + "print(\"Filled dates with backward fill\")\n", + "\n", + "print(f\"\\nMissing values after basic imputation:\")\n", + "print(df_basic_impute.isnull().sum().sum())\n", + "\n", + "# Show before/after comparison\n", + "print(\"\\nComparison (first 10 rows):\")\n", + "comparison_cols = ['income', 'education_years', 'purchase_amount', 'satisfaction_score']\n", + "for col in comparison_cols:\n", + " before_missing = df_missing[col].isnull().sum()\n", + " after_missing = df_basic_impute[col].isnull().sum()\n", + " print(f\"{col}: {before_missing} → {after_missing} missing values\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 3. Advanced Imputation Techniques\n", + "\n", + "Sophisticated methods for handling missing data." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Group-based imputation\n", + "def group_based_imputation(df):\n", + " \"\"\"Impute missing values based on group statistics\"\"\"\n", + " df_group_impute = df.copy()\n", + " \n", + " print(\"=== GROUP-BASED IMPUTATION ===\")\n", + " \n", + " # Impute income based on region and education level\n", + " # First, create education level categories\n", + " df_group_impute['education_level'] = pd.cut(\n", + " df_group_impute['education_years'].fillna(df_group_impute['education_years'].median()),\n", + " bins=[0, 12, 16, 20],\n", + " labels=['High School', 'Bachelor', 'Advanced']\n", + " )\n", + " \n", + " # Calculate group-based statistics\n", + " income_by_group = df_group_impute.groupby(['region', 'education_level'])['income'].median()\n", + " \n", + " # Fill missing income values\n", + " def fill_income(row):\n", + " if pd.isna(row['income']):\n", + " try:\n", + " return income_by_group.loc[(row['region'], row['education_level'])]\n", + " except KeyError:\n", + " return df_group_impute['income'].median()\n", + " return row['income']\n", + " \n", + " df_group_impute['income'] = df_group_impute.apply(fill_income, axis=1)\n", + " \n", + " print(\"Income imputed based on region and education level\")\n", + " print(\"Group-based median income:\")\n", + " print(income_by_group.round(0))\n", + " \n", + " return df_group_impute\n", + "\n", + "# Apply group-based imputation\n", + "df_group_imputed = group_based_imputation(df_missing)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 4. Comparison of Imputation Methods\n", + "\n", + "Compare different imputation approaches and their impact." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def compare_imputation_methods(original_complete, original_missing, *imputed_dfs, methods_names):\n", + " \"\"\"Compare different imputation methods\"\"\"\n", + " print(\"=== IMPUTATION METHODS COMPARISON ===\")\n", + " \n", + " # Focus on a specific column for comparison\n", + " column = 'income'\n", + " \n", + " if column not in original_complete.columns:\n", + " print(f\"Column {column} not found\")\n", + " return\n", + " \n", + " # Get original values that were made missing\n", + " missing_mask = original_missing[column].isnull()\n", + " true_values = original_complete.loc[missing_mask, column]\n", + " \n", + " print(f\"Comparing imputation for '{column}' column\")\n", + " print(f\"Number of missing values: {len(true_values)}\")\n", + " \n", + " # Calculate errors for each method\n", + " results = {}\n", + " \n", + " for df_imputed, method_name in zip(imputed_dfs, methods_names):\n", + " if column in df_imputed.columns:\n", + " imputed_values = df_imputed.loc[missing_mask, column]\n", + " \n", + " # Calculate metrics\n", + " mae = np.mean(np.abs(true_values - imputed_values))\n", + " rmse = np.sqrt(np.mean((true_values - imputed_values) ** 2))\n", + " bias = np.mean(imputed_values - true_values)\n", + " \n", + " results[method_name] = {\n", + " 'MAE': mae,\n", + " 'RMSE': rmse,\n", + " 'Bias': bias,\n", + " 'Mean_Imputed': np.mean(imputed_values),\n", + " 'Std_Imputed': np.std(imputed_values)\n", + " }\n", + " \n", + " # True statistics\n", + " print(f\"\\nTrue statistics for missing values:\")\n", + " print(f\"Mean: {np.mean(true_values):.2f}\")\n", + " print(f\"Std: {np.std(true_values):.2f}\")\n", + " \n", + " # Results comparison\n", + " results_df = pd.DataFrame(results).T\n", + " print(f\"\\nImputation comparison results:\")\n", + " print(results_df.round(2))\n", + " \n", + " # Visualize comparison\n", + " fig, axes = plt.subplots(2, 2, figsize=(15, 10))\n", + " \n", + " # Distribution comparison\n", + " axes[0, 0].hist(true_values, alpha=0.7, label='True Values', bins=20)\n", + " for df_imputed, method_name in zip(imputed_dfs, methods_names):\n", + " if column in df_imputed.columns:\n", + " imputed_values = df_imputed.loc[missing_mask, column]\n", + " axes[0, 0].hist(imputed_values, alpha=0.7, label=f'{method_name}', bins=20)\n", + " axes[0, 0].set_title('Distribution Comparison')\n", + " axes[0, 0].legend()\n", + " \n", + " # Error metrics\n", + " metrics = ['MAE', 'RMSE']\n", + " for i, metric in enumerate(metrics):\n", + " values = [results[method][metric] for method in results.keys()]\n", + " axes[0, 1].bar(range(len(values)), values, alpha=0.7)\n", + " axes[0, 1].set_xticks(range(len(results)))\n", + " axes[0, 1].set_xticklabels(list(results.keys()), rotation=45)\n", + " axes[0, 1].set_title(f'{metric} Comparison')\n", + " break # Show only MAE for now\n", + " \n", + " # Scatter plot: True vs Imputed\n", + " for i, (df_imputed, method_name) in enumerate(zip(imputed_dfs[:2], methods_names[:2])):\n", + " if column in df_imputed.columns:\n", + " imputed_values = df_imputed.loc[missing_mask, column]\n", + " ax = axes[1, i]\n", + " ax.scatter(true_values, imputed_values, alpha=0.6)\n", + " ax.plot([true_values.min(), true_values.max()], \n", + " [true_values.min(), true_values.max()], 'r--', label='Perfect Prediction')\n", + " ax.set_xlabel('True Values')\n", + " ax.set_ylabel('Imputed Values')\n", + " ax.set_title(f'{method_name}: True vs Imputed')\n", + " ax.legend()\n", + " \n", + " plt.tight_layout()\n", + " plt.show()\n", + " \n", + " return results_df\n", + "\n", + "# Compare methods\n", + "comparison_results = compare_imputation_methods(\n", + " df_complete, \n", + " df_missing,\n", + " df_basic_impute,\n", + " methods_names=['Basic Fill', 'KNN', 'Iterative']\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 5. Domain-Specific Imputation Strategies\n", + "\n", + "Business logic-driven approaches to missing data." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def business_logic_imputation(df):\n", + " \"\"\"Apply business logic for missing value imputation\"\"\"\n", + " print(\"=== BUSINESS LOGIC IMPUTATION ===\")\n", + " \n", + " df_business = df.copy()\n", + " \n", + " # 1. Income imputation based on age and education\n", + " def estimate_income(row):\n", + " if pd.notna(row['income']):\n", + " return row['income']\n", + " \n", + " # Base income estimation\n", + " base_income = 30000\n", + " \n", + " # Age factor (experience premium)\n", + " if pd.notna(row['age']):\n", + " if row['age'] > 40:\n", + " base_income *= 1.5\n", + " elif row['age'] > 30:\n", + " base_income *= 1.2\n", + " \n", + " # Education factor\n", + " if pd.notna(row['education_years']):\n", + " if row['education_years'] > 16: # Graduate degree\n", + " base_income *= 1.8\n", + " elif row['education_years'] > 12: # Bachelor's\n", + " base_income *= 1.4\n", + " \n", + " # Regional adjustment\n", + " regional_multipliers = {\n", + " 'North': 1.2, # Higher cost of living\n", + " 'South': 0.9,\n", + " 'East': 1.1,\n", + " 'West': 1.0\n", + " }\n", + " base_income *= regional_multipliers.get(row['region'], 1.0)\n", + " \n", + " return base_income\n", + " \n", + " # Apply income estimation\n", + " df_business['income'] = df_business.apply(estimate_income, axis=1)\n", + " \n", + " # 2. Satisfaction score based on purchase behavior\n", + " def estimate_satisfaction(row):\n", + " if pd.notna(row['satisfaction_score']):\n", + " return row['satisfaction_score']\n", + " \n", + " # Base satisfaction\n", + " base_satisfaction = 3 # Neutral\n", + " \n", + " # Purchase amount influence\n", + " if pd.notna(row['purchase_amount']):\n", + " if row['purchase_amount'] > 250: # High value purchase\n", + " base_satisfaction = 4\n", + " elif row['purchase_amount'] < 100: # Low value might indicate dissatisfaction\n", + " base_satisfaction = 2\n", + " \n", + " return base_satisfaction\n", + " \n", + " # Apply satisfaction estimation\n", + " df_business['satisfaction_score'] = df_business.apply(estimate_satisfaction, axis=1)\n", + " \n", + " # 3. Education years based on income and age\n", + " def estimate_education(row):\n", + " if pd.notna(row['education_years']):\n", + " return row['education_years']\n", + " \n", + " # Base education\n", + " base_education = 12 # High school\n", + " \n", + " # Income-based estimation\n", + " if pd.notna(row['income']):\n", + " if row['income'] > 70000:\n", + " base_education = 18 # Graduate level\n", + " elif row['income'] > 45000:\n", + " base_education = 16 # Bachelor's\n", + " elif row['income'] > 35000:\n", + " base_education = 14 # Some college\n", + " \n", + " # Age adjustment (older people might have different education patterns)\n", + " if pd.notna(row['age']) and row['age'] > 55:\n", + " base_education = max(12, base_education - 2) # Lower average for older generation\n", + " \n", + " return base_education\n", + " \n", + " # Apply education estimation\n", + " df_business['education_years'] = df_business.apply(estimate_education, axis=1)\n", + " \n", + " print(\"Business logic imputation completed\")\n", + " print(f\"Missing values remaining: {df_business.isnull().sum().sum()}\")\n", + " \n", + " return df_business\n", + "\n", + "# Apply business logic imputation\n", + "df_business_imputed = business_logic_imputation(df_missing)\n", + "\n", + "print(\"\\nBusiness logic imputation summary:\")\n", + "for col in ['income', 'satisfaction_score', 'education_years']:\n", + " before = df_missing[col].isnull().sum()\n", + " after = df_business_imputed[col].isnull().sum()\n", + " print(f\"{col}: {before} → {after} missing values\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 6. Missing Data Flags and Indicators\n", + "\n", + "Track which values were imputed for transparency and analysis." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def create_missing_indicators(df_original, df_imputed):\n", + " \"\"\"Create indicator variables for missing data\"\"\"\n", + " print(\"=== CREATING MISSING DATA INDICATORS ===\")\n", + " \n", + " df_with_indicators = df_imputed.copy()\n", + " \n", + " # Create indicator columns for each column that had missing data\n", + " columns_with_missing = df_original.columns[df_original.isnull().any()].tolist()\n", + " \n", + " for col in columns_with_missing:\n", + " indicator_col = f'{col}_was_missing'\n", + " df_with_indicators[indicator_col] = df_original[col].isnull().astype(int)\n", + " \n", + " print(f\"Created {len(columns_with_missing)} missing data indicators\")\n", + " print(f\"Indicator columns: {[f'{col}_was_missing' for col in columns_with_missing]}\")\n", + " \n", + " # Summary of missing patterns\n", + " indicator_cols = [f'{col}_was_missing' for col in columns_with_missing]\n", + " missing_patterns = df_with_indicators[indicator_cols].sum()\n", + " \n", + " print(\"\\nMissing data summary by column:\")\n", + " for col, count in missing_patterns.items():\n", + " original_col = col.replace('_was_missing', '')\n", + " percentage = (count / len(df_with_indicators)) * 100\n", + " print(f\"{original_col}: {count} values imputed ({percentage:.1f}%)\")\n", + " \n", + " # Create composite missing indicator\n", + " df_with_indicators['total_missing_count'] = df_with_indicators[indicator_cols].sum(axis=1)\n", + " df_with_indicators['has_any_missing'] = (df_with_indicators['total_missing_count'] > 0).astype(int)\n", + " \n", + " return df_with_indicators, indicator_cols\n", + "\n", + "# Create missing indicators\n", + "df_with_indicators, indicator_columns = create_missing_indicators(df_missing, df_business_imputed)\n", + "\n", + "print(\"\\nDataset with missing indicators:\")\n", + "sample_cols = ['income', 'income_was_missing', 'education_years', 'education_years_was_missing', \n", + " 'satisfaction_score', 'satisfaction_score_was_missing', 'total_missing_count']\n", + "available_cols = [col for col in sample_cols if col in df_with_indicators.columns]\n", + "print(df_with_indicators[available_cols].head(10))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 7. Validation and Quality Assessment\n", + "\n", + "Validate the quality of imputation results." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def validate_imputation_quality(df_original, df_missing, df_imputed):\n", + " \"\"\"Validate the quality of imputation\"\"\"\n", + " print(\"=== IMPUTATION QUALITY VALIDATION ===\")\n", + " \n", + " validation_results = {}\n", + " \n", + " # Check each column that had missing data\n", + " for col in df_missing.columns:\n", + " if df_missing[col].isnull().any() and col in df_imputed.columns:\n", + " print(f\"\\n--- Validating {col} ---\")\n", + " \n", + " # Get missing mask\n", + " missing_mask = df_missing[col].isnull()\n", + " \n", + " # Original statistics (complete data)\n", + " original_stats = df_original[col].describe()\n", + " \n", + " # Imputed statistics (only imputed values)\n", + " if missing_mask.any():\n", + " imputed_values = df_imputed.loc[missing_mask, col]\n", + " \n", + " if pd.api.types.is_numeric_dtype(df_original[col]):\n", + " imputed_stats = imputed_values.describe()\n", + " \n", + " # Statistical tests\n", + " mean_diff = abs(original_stats['mean'] - imputed_stats['mean'])\n", + " std_diff = abs(original_stats['std'] - imputed_stats['std'])\n", + " \n", + " validation_results[col] = {\n", + " 'original_mean': original_stats['mean'],\n", + " 'imputed_mean': imputed_stats['mean'],\n", + " 'mean_difference': mean_diff,\n", + " 'original_std': original_stats['std'],\n", + " 'imputed_std': imputed_stats['std'],\n", + " 'std_difference': std_diff,\n", + " 'values_imputed': len(imputed_values)\n", + " }\n", + " \n", + " print(f\"Original mean: {original_stats['mean']:.2f}, Imputed mean: {imputed_stats['mean']:.2f}\")\n", + " print(f\"Mean difference: {mean_diff:.2f} ({mean_diff/original_stats['mean']*100:.1f}%)\")\n", + " print(f\"Original std: {original_stats['std']:.2f}, Imputed std: {imputed_stats['std']:.2f}\")\n", + " \n", + " else:\n", + " # Categorical data\n", + " original_dist = df_original[col].value_counts(normalize=True)\n", + " imputed_dist = imputed_values.value_counts(normalize=True)\n", + " print(f\"Original distribution: {original_dist.to_dict()}\")\n", + " print(f\"Imputed distribution: {imputed_dist.to_dict()}\")\n", + " \n", + " # Overall validation summary\n", + " if validation_results:\n", + " validation_df = pd.DataFrame(validation_results).T\n", + " print(\"\\n=== VALIDATION SUMMARY ===\")\n", + " print(validation_df.round(3))\n", + " \n", + " # Flag potential issues\n", + " print(\"\\n--- Potential Issues ---\")\n", + " for col, stats in validation_results.items():\n", + " mean_change = abs(stats['mean_difference'] / stats['original_mean']) * 100\n", + " if mean_change > 10: # More than 10% change in mean\n", + " print(f\"āš ļø {col}: Large mean change ({mean_change:.1f}%)\")\n", + " \n", + " std_change = abs(stats['std_difference'] / stats['original_std']) * 100\n", + " if std_change > 20: # More than 20% change in std\n", + " print(f\"āš ļø {col}: Large variance change ({std_change:.1f}%)\")\n", + " \n", + " return validation_results\n", + "\n", + "# Validate imputation quality\n", + "validation_results = validate_imputation_quality(df_complete, df_missing, df_business_imputed)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Practice Exercises\n", + "\n", + "Apply missing data handling techniques to challenging scenarios:" + ] + }, + { + "cell_type": "code", + "execution_count": 53, + "metadata": {}, + "outputs": [], + "source": [ + "# Exercise 1: Multi-step imputation strategy\n", + "# Create a sophisticated imputation pipeline that:\n", + "# 1. Handles different types of missing data appropriately\n", + "# 2. Uses multiple imputation methods in sequence\n", + "# 3. Validates results at each step\n", + "# 4. Creates comprehensive documentation\n", + "\n", + "def comprehensive_imputation_pipeline(df):\n", + " \"\"\"Comprehensive missing data handling pipeline\"\"\"\n", + " # Your implementation here\n", + " pass\n", + "\n", + "# result_df = comprehensive_imputation_pipeline(df_missing)\n", + "# print(\"Comprehensive pipeline results:\")\n", + "# print(result_df.isnull().sum())" + ] + }, + { + "cell_type": "code", + "execution_count": 54, + "metadata": {}, + "outputs": [], + "source": [ + "# Exercise 2: Missing data pattern analysis\n", + "# Analyze if missing data follows specific patterns:\n", + "# - Time-based patterns\n", + "# - User behavior patterns\n", + "# - System/technical patterns\n", + "# Create insights and recommendations\n", + "\n", + "# Your code here:\n" + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "metadata": {}, + "outputs": [], + "source": [ + "# Exercise 3: Impact assessment\n", + "# Assess how different missing data handling approaches\n", + "# affect downstream analysis:\n", + "# - Statistical analysis results\n", + "# - Machine learning model performance\n", + "# - Business insights and decisions\n", + "\n", + "# Your code here:\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Key Takeaways\n", + "\n", + "1. **Understanding Missing Data Types**:\n", + " - **MCAR**: Missing Completely at Random\n", + " - **MAR**: Missing at Random (depends on observed data)\n", + " - **MNAR**: Missing Not at Random (depends on unobserved data)\n", + "\n", + "2. **Detection and Analysis**:\n", + " - Always analyze missing patterns before imputation\n", + " - Use visualizations to understand missing data structure\n", + " - Look for relationships between missing values and other variables\n", + "\n", + "3. **Handling Strategies**:\n", + " - **Deletion**: Simple but can lose valuable information\n", + " - **Simple Imputation**: Fast but may not preserve relationships\n", + " - **Advanced Methods**: KNN, MICE preserve more complex relationships\n", + " - **Business Logic**: Domain knowledge often provides best results\n", + "\n", + "4. **Best Practices**:\n", + " - Create missing data indicators for transparency\n", + " - Validate imputation quality against original data when possible\n", + " - Consider the impact on downstream analysis\n", + " - Document all imputation decisions and methods\n", + "\n", + "## Method Selection Guide\n", + "\n", + "| Scenario | Recommended Method | Rationale |\n", + "|----------|-------------------|----------|\n", + "| < 5% missing, MCAR | Simple imputation | Low impact, efficiency |\n", + "| 5-20% missing, MAR | KNN or Group-based | Preserve relationships |\n", + "| > 20% missing, complex patterns | MICE or Multiple imputation | Handle complex dependencies |\n", + "| Business-critical decisions | Domain knowledge + validation | Accuracy and explainability |\n", + "| Machine learning features | Advanced methods + indicators | Preserve predictive power |\n", + "\n", + "## Common Pitfalls to Avoid\n", + "\n", + "1. **Data Leakage**: Don't use future information to impute past values\n", + "2. **Ignoring Patterns**: Missing data often has meaningful patterns\n", + "3. **Over-imputation**: Sometimes missing data is informative itself\n", + "4. **One-size-fits-all**: Different columns may need different strategies\n", + "5. **No Validation**: Always check if imputation preserved data characteristics" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.13.3" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/Session_01/PandasDataFrame-exmples/07_merging_joining.ipynb b/Session_01/PandasDataFrame-exmples/07_merging_joining.ipynb new file mode 100755 index 0000000..0af7c16 --- /dev/null +++ b/Session_01/PandasDataFrame-exmples/07_merging_joining.ipynb @@ -0,0 +1,937 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Session 1 - DataFrames - Lesson 7: Merging and Joining DataFrames\n", + "\n", + "## Learning Objectives\n", + "- Master different types of joins (inner, outer, left, right)\n", + "- Understand when to use merge vs join vs concat\n", + "- Handle duplicate keys and join conflicts\n", + "- Learn advanced merging techniques and best practices\n", + "- Practice with real-world data integration scenarios\n", + "\n", + "## Prerequisites\n", + "- Completed Lessons 1-6\n", + "- Understanding of relational database concepts (helpful)\n", + "- Basic knowledge of SQL joins (helpful but not required)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Import required libraries\n", + "import pandas as pd\n", + "import numpy as np\n", + "from datetime import datetime, timedelta\n", + "import matplotlib.pyplot as plt\n", + "import warnings\n", + "warnings.filterwarnings('ignore')\n", + "\n", + "# Set display options\n", + "pd.set_option('display.max_columns', None)\n", + "pd.set_option('display.max_rows', 50)\n", + "\n", + "print(\"Libraries loaded successfully!\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Creating Sample Datasets\n", + "\n", + "Let's create realistic datasets that represent common business scenarios." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Create sample datasets for merging examples\n", + "np.random.seed(42)\n", + "\n", + "# Customer dataset\n", + "customers_data = {\n", + " 'customer_id': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],\n", + " 'customer_name': ['Alice Johnson', 'Bob Smith', 'Charlie Brown', 'Diana Prince', 'Eve Wilson',\n", + " 'Frank Miller', 'Grace Lee', 'Henry Davis', 'Ivy Chen', 'Jack Robinson'],\n", + " 'email': ['alice@email.com', 'bob@email.com', 'charlie@email.com', 'diana@email.com', 'eve@email.com',\n", + " 'frank@email.com', 'grace@email.com', 'henry@email.com', 'ivy@email.com', 'jack@email.com'],\n", + " 'age': [28, 35, 42, 31, 29, 45, 38, 33, 27, 41],\n", + " 'city': ['New York', 'Los Angeles', 'Chicago', 'Houston', 'Phoenix',\n", + " 'Philadelphia', 'San Antonio', 'San Diego', 'Dallas', 'San Jose'],\n", + " 'signup_date': pd.date_range('2023-01-01', periods=10, freq='M')\n", + "}\n", + "\n", + "df_customers = pd.DataFrame(customers_data)\n", + "\n", + "# Orders dataset\n", + "orders_data = {\n", + " 'order_id': [101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112],\n", + " 'customer_id': [1, 2, 1, 3, 4, 2, 5, 1, 6, 11, 3, 2], # Note: customer_id 11 doesn't exist in customers\n", + " 'order_date': pd.date_range('2023-06-01', periods=12, freq='W'),\n", + " 'product': ['Laptop', 'Phone', 'Tablet', 'Laptop', 'Monitor', 'Phone', \n", + " 'Headphones', 'Mouse', 'Keyboard', 'Laptop', 'Tablet', 'Monitor'],\n", + " 'quantity': [1, 2, 1, 1, 1, 1, 3, 2, 1, 1, 2, 1],\n", + " 'amount': [1200, 800, 400, 1200, 300, 800, 150, 50, 75, 1200, 800, 300]\n", + "}\n", + "\n", + "df_orders = pd.DataFrame(orders_data)\n", + "\n", + "# Product information dataset\n", + "products_data = {\n", + " 'product': ['Laptop', 'Phone', 'Tablet', 'Monitor', 'Headphones', 'Mouse', 'Keyboard', 'Webcam'],\n", + " 'category': ['Electronics', 'Electronics', 'Electronics', 'Electronics', \n", + " 'Audio', 'Accessories', 'Accessories', 'Electronics'],\n", + " 'price': [1200, 800, 400, 300, 150, 50, 75, 100],\n", + " 'supplier': ['TechCorp', 'MobileCorp', 'TechCorp', 'DisplayCorp', \n", + " 'AudioCorp', 'AccessoryCorp', 'AccessoryCorp', 'TechCorp']\n", + "}\n", + "\n", + "df_products = pd.DataFrame(products_data)\n", + "\n", + "# Customer segments dataset\n", + "segments_data = {\n", + " 'customer_id': [1, 2, 3, 4, 5, 6, 7, 8, 12, 13], # Some customers not in main customer table\n", + " 'segment': ['Premium', 'Standard', 'Premium', 'Standard', 'Basic', \n", + " 'Premium', 'Standard', 'Basic', 'Premium', 'Standard'],\n", + " 'loyalty_points': [1500, 800, 1200, 600, 200, 1800, 750, 300, 2000, 900]\n", + "}\n", + "\n", + "df_segments = pd.DataFrame(segments_data)\n", + "\n", + "print(\"Sample datasets created:\")\n", + "print(f\"Customers: {df_customers.shape}\")\n", + "print(f\"Orders: {df_orders.shape}\")\n", + "print(f\"Products: {df_products.shape}\")\n", + "print(f\"Segments: {df_segments.shape}\")\n", + "\n", + "print(\"\\nCustomers dataset:\")\n", + "print(df_customers.head())\n", + "\n", + "print(\"\\nOrders dataset:\")\n", + "print(df_orders.head())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 1. Basic Merge Operations\n", + "\n", + "Understanding the fundamental merge operations and join types." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Inner Join - only matching records\n", + "print(\"=== INNER JOIN ===\")\n", + "inner_join = pd.merge(df_customers, df_orders, on='customer_id', how='inner')\n", + "print(f\"Result shape: {inner_join.shape}\")\n", + "print(\"Sample results:\")\n", + "print(inner_join[['customer_name', 'order_id', 'product', 'amount']].head())\n", + "\n", + "print(f\"\\nUnique customers in result: {inner_join['customer_id'].nunique()}\")\n", + "print(f\"Total orders: {len(inner_join)}\")\n", + "\n", + "# Check which customers have orders\n", + "customers_with_orders = inner_join['customer_id'].unique()\n", + "print(f\"Customers with orders: {sorted(customers_with_orders)}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Left Join - all records from left table\n", + "print(\"=== LEFT JOIN ===\")\n", + "left_join = pd.merge(df_customers, df_orders, on='customer_id', how='left')\n", + "print(f\"Result shape: {left_join.shape}\")\n", + "print(\"Sample results:\")\n", + "print(left_join[['customer_name', 'order_id', 'product', 'amount']].head(10))\n", + "\n", + "# Check customers without orders\n", + "customers_without_orders = left_join[left_join['order_id'].isnull()]['customer_name'].tolist()\n", + "print(f\"\\nCustomers without orders: {customers_without_orders}\")\n", + "\n", + "# Summary statistics\n", + "print(f\"\\nTotal records: {len(left_join)}\")\n", + "print(f\"Records with orders: {left_join['order_id'].notna().sum()}\")\n", + "print(f\"Records without orders: {left_join['order_id'].isnull().sum()}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Right Join - all records from right table\n", + "print(\"=== RIGHT JOIN ===\")\n", + "right_join = pd.merge(df_customers, df_orders, on='customer_id', how='right')\n", + "print(f\"Result shape: {right_join.shape}\")\n", + "print(\"Sample results:\")\n", + "print(right_join[['customer_name', 'order_id', 'product', 'amount']].head())\n", + "\n", + "# Check orders without customer information\n", + "orders_without_customers = right_join[right_join['customer_name'].isnull()]\n", + "print(f\"\\nOrders without customer info: {len(orders_without_customers)}\")\n", + "if len(orders_without_customers) > 0:\n", + " print(orders_without_customers[['customer_id', 'order_id', 'product', 'amount']])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Outer Join - all records from both tables\n", + "print(\"=== OUTER JOIN ===\")\n", + "outer_join = pd.merge(df_customers, df_orders, on='customer_id', how='outer')\n", + "print(f\"Result shape: {outer_join.shape}\")\n", + "\n", + "# Analyze the result\n", + "print(\"\\nData quality analysis:\")\n", + "print(f\"Records with complete customer info: {outer_join['customer_name'].notna().sum()}\")\n", + "print(f\"Records with complete order info: {outer_join['order_id'].notna().sum()}\")\n", + "print(f\"Records with both customer and order info: {(outer_join['customer_name'].notna() & outer_join['order_id'].notna()).sum()}\")\n", + "\n", + "# Show different categories of records\n", + "print(\"\\nCustomers without orders:\")\n", + "customers_only = outer_join[(outer_join['customer_name'].notna()) & (outer_join['order_id'].isnull())]\n", + "print(customers_only[['customer_name', 'city']].drop_duplicates())\n", + "\n", + "print(\"\\nOrders without customer data:\")\n", + "orders_only = outer_join[(outer_join['customer_name'].isnull()) & (outer_join['order_id'].notna())]\n", + "print(orders_only[['customer_id', 'order_id', 'product', 'amount']])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 2. Multiple Table Joins\n", + "\n", + "Combining data from multiple sources in sequence." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Three-way join: Customers + Orders + Products\n", + "print(\"=== THREE-WAY JOIN ===\")\n", + "\n", + "# Step 1: Join customers and orders\n", + "customer_orders = pd.merge(df_customers, df_orders, on='customer_id', how='inner')\n", + "print(f\"After joining customers and orders: {customer_orders.shape}\")\n", + "\n", + "# Step 2: Join with products\n", + "complete_data = pd.merge(customer_orders, df_products, on='product', how='left')\n", + "print(f\"After joining with products: {complete_data.shape}\")\n", + "\n", + "# Display comprehensive view\n", + "print(\"\\nComplete order information:\")\n", + "display_cols = ['customer_name', 'order_id', 'product', 'category', 'quantity', 'amount', 'price', 'supplier']\n", + "print(complete_data[display_cols].head())\n", + "\n", + "# Verify data consistency\n", + "print(\"\\nData consistency check:\")\n", + "# Check if order amount matches product price * quantity\n", + "complete_data['calculated_amount'] = complete_data['price'] * complete_data['quantity']\n", + "amount_matches = (complete_data['amount'] == complete_data['calculated_amount']).all()\n", + "print(f\"Order amounts match calculated amounts: {amount_matches}\")\n", + "\n", + "if not amount_matches:\n", + " mismatched = complete_data[complete_data['amount'] != complete_data['calculated_amount']]\n", + " print(f\"\\nMismatched records: {len(mismatched)}\")\n", + " print(mismatched[['order_id', 'product', 'amount', 'calculated_amount']])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Add customer segment information\n", + "print(\"=== ADDING CUSTOMER SEGMENTS ===\")\n", + "\n", + "# Join with segments (left join to keep all customers)\n", + "customers_with_segments = pd.merge(df_customers, df_segments, on='customer_id', how='left')\n", + "print(f\"Customers with segments shape: {customers_with_segments.shape}\")\n", + "\n", + "# Check which customers don't have segment information\n", + "missing_segments = customers_with_segments[customers_with_segments['segment'].isnull()]\n", + "print(f\"\\nCustomers without segment info: {len(missing_segments)}\")\n", + "if len(missing_segments) > 0:\n", + " print(missing_segments[['customer_name', 'city']])\n", + "\n", + "# Create comprehensive customer profile\n", + "full_customer_profile = pd.merge(complete_data, df_segments, on='customer_id', how='left')\n", + "print(f\"\\nFull customer profile shape: {full_customer_profile.shape}\")\n", + "\n", + "# Analyze by segment\n", + "segment_analysis = full_customer_profile.groupby('segment').agg({\n", + " 'amount': ['sum', 'mean', 'count'],\n", + " 'customer_id': 'nunique'\n", + "}).round(2)\n", + "segment_analysis.columns = ['Total_Revenue', 'Avg_Order_Value', 'Total_Orders', 'Unique_Customers']\n", + "print(\"\\nRevenue by customer segment:\")\n", + "print(segment_analysis)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 3. Advanced Merge Techniques\n", + "\n", + "Handling complex merging scenarios and edge cases." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Merge with different column names\n", + "print(\"=== MERGE WITH DIFFERENT COLUMN NAMES ===\")\n", + "\n", + "# Create a dataset with different column name\n", + "customer_demographics = pd.DataFrame({\n", + " 'cust_id': [1, 2, 3, 4, 5],\n", + " 'income_range': ['50-75k', '75-100k', '50-75k', '100k+', '25-50k'],\n", + " 'education': ['Bachelor', 'Master', 'PhD', 'Master', 'Bachelor'],\n", + " 'occupation': ['Engineer', 'Manager', 'Professor', 'Director', 'Analyst']\n", + "})\n", + "\n", + "# Merge using left_on and right_on parameters\n", + "customers_with_demographics = pd.merge(\n", + " df_customers, \n", + " customer_demographics, \n", + " left_on='customer_id', \n", + " right_on='cust_id', \n", + " how='left'\n", + ")\n", + "\n", + "print(\"Merge with different column names:\")\n", + "print(customers_with_demographics[['customer_name', 'customer_id', 'cust_id', 'income_range', 'education']].head())\n", + "\n", + "# Clean up duplicate columns\n", + "customers_with_demographics = customers_with_demographics.drop('cust_id', axis=1)\n", + "print(f\"\\nAfter cleanup: {customers_with_demographics.shape}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Merge on multiple columns\n", + "print(\"=== MERGE ON MULTIPLE COLUMNS ===\")\n", + "\n", + "# Create time-based pricing data\n", + "pricing_data = pd.DataFrame({\n", + " 'product': ['Laptop', 'Laptop', 'Phone', 'Phone', 'Tablet', 'Tablet'],\n", + " 'date': pd.to_datetime(['2023-06-01', '2023-08-01', '2023-06-01', '2023-08-01', '2023-06-01', '2023-08-01']),\n", + " 'price': [1200, 1100, 800, 750, 400, 380],\n", + " 'promotion': [False, True, False, True, False, True]\n", + "})\n", + "\n", + "# Add year-month to orders for matching\n", + "df_orders_with_period = df_orders.copy()\n", + "df_orders_with_period['order_month'] = df_orders_with_period['order_date'].dt.to_period('M').dt.start_time\n", + "\n", + "# Create matching periods in pricing data\n", + "pricing_data['period'] = pricing_data['date'].dt.to_period('M').dt.start_time\n", + "\n", + "# Merge on product and time period\n", + "orders_with_pricing = pd.merge(\n", + " df_orders_with_period,\n", + " pricing_data,\n", + " left_on=['product', 'order_month'],\n", + " right_on=['product', 'period'],\n", + " how='left'\n", + ")\n", + "\n", + "print(\"Orders with time-based pricing:\")\n", + "print(orders_with_pricing[['order_id', 'product', 'order_date', 'amount', 'price', 'promotion']].head())\n", + "\n", + "# Check for pricing discrepancies\n", + "pricing_discrepancies = orders_with_pricing[\n", + " (orders_with_pricing['amount'] != orders_with_pricing['price'] * orders_with_pricing['quantity']) &\n", + " orders_with_pricing['price'].notna()\n", + "]\n", + "print(f\"\\nOrders with pricing discrepancies: {len(pricing_discrepancies)}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Handling duplicate keys in merge\n", + "print(\"=== HANDLING DUPLICATE KEYS ===\")\n", + "\n", + "# Create data with duplicate keys\n", + "customer_contacts = pd.DataFrame({\n", + " 'customer_id': [1, 1, 2, 2, 3],\n", + " 'contact_type': ['email', 'phone', 'email', 'phone', 'email'],\n", + " 'contact_value': ['alice@email.com', '555-0101', 'bob@email.com', '555-0102', 'charlie@email.com'],\n", + " 'is_primary': [True, False, True, True, True]\n", + "})\n", + "\n", + "print(\"Customer contacts with duplicates:\")\n", + "print(customer_contacts)\n", + "\n", + "# Merge will create cartesian product for duplicate keys\n", + "customers_with_contacts = pd.merge(df_customers, customer_contacts, on='customer_id', how='inner')\n", + "print(f\"\\nResult of merge with duplicates: {customers_with_contacts.shape}\")\n", + "print(customers_with_contacts[['customer_name', 'contact_type', 'contact_value', 'is_primary']].head())\n", + "\n", + "# Strategy 1: Filter before merge\n", + "primary_contacts = customer_contacts[customer_contacts['is_primary'] == True]\n", + "customers_primary_contacts = pd.merge(df_customers, primary_contacts, on='customer_id', how='left')\n", + "print(f\"\\nAfter filtering to primary contacts: {customers_primary_contacts.shape}\")\n", + "\n", + "# Strategy 2: Pivot contacts to columns\n", + "contacts_pivoted = customer_contacts.pivot_table(\n", + " index='customer_id',\n", + " columns='contact_type',\n", + " values='contact_value',\n", + " aggfunc='first'\n", + ").reset_index()\n", + "print(\"\\nPivoted contacts:\")\n", + "print(contacts_pivoted)\n", + "\n", + "customers_with_pivoted_contacts = pd.merge(df_customers, contacts_pivoted, on='customer_id', how='left')\n", + "print(f\"\\nAfter merging pivoted contacts: {customers_with_pivoted_contacts.shape}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 4. Index-based Joins\n", + "\n", + "Using DataFrame indices for joining operations." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Set up DataFrames with indices\n", + "print(\"=== INDEX-BASED JOINS ===\")\n", + "\n", + "# Set customer_id as index\n", + "customers_indexed = df_customers.set_index('customer_id')\n", + "segments_indexed = df_segments.set_index('customer_id')\n", + "\n", + "print(\"Customers with index:\")\n", + "print(customers_indexed.head())\n", + "\n", + "# Join using indices\n", + "joined_by_index = customers_indexed.join(segments_indexed, how='left')\n", + "print(f\"\\nJoined by index shape: {joined_by_index.shape}\")\n", + "print(joined_by_index[['customer_name', 'city', 'segment', 'loyalty_points']].head())\n", + "\n", + "# Compare with merge\n", + "merged_equivalent = pd.merge(df_customers, df_segments, on='customer_id', how='left')\n", + "print(f\"\\nEquivalent merge shape: {merged_equivalent.shape}\")\n", + "\n", + "# Verify they're the same (after sorting)\n", + "joined_sorted = joined_by_index.reset_index().sort_values('customer_id')\n", + "merged_sorted = merged_equivalent.sort_values('customer_id')\n", + "are_equal = joined_sorted.equals(merged_sorted)\n", + "print(f\"Results are identical: {are_equal}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Multi-index joins\n", + "print(\"=== MULTI-INDEX JOINS ===\")\n", + "\n", + "# Create a dataset with multiple index levels\n", + "sales_by_region_product = pd.DataFrame({\n", + " 'region': ['North', 'North', 'South', 'South', 'East', 'East'],\n", + " 'product': ['Laptop', 'Phone', 'Laptop', 'Phone', 'Laptop', 'Phone'],\n", + " 'sales_target': [10, 15, 8, 12, 12, 18],\n", + " 'commission_rate': [0.05, 0.04, 0.06, 0.05, 0.05, 0.04]\n", + "})\n", + "\n", + "# Set multi-index\n", + "sales_targets = sales_by_region_product.set_index(['region', 'product'])\n", + "print(\"Sales targets with multi-index:\")\n", + "print(sales_targets)\n", + "\n", + "# Create customer orders with region mapping\n", + "customer_regions = {\n", + " 1: 'North', 2: 'South', 3: 'East', 4: 'North', 5: 'South', 6: 'East'\n", + "}\n", + "\n", + "orders_with_region = df_orders.copy()\n", + "orders_with_region['region'] = orders_with_region['customer_id'].map(customer_regions)\n", + "orders_with_region = orders_with_region.dropna(subset=['region'])\n", + "\n", + "# Merge on multiple columns to match multi-index\n", + "orders_with_targets = pd.merge(\n", + " orders_with_region,\n", + " sales_targets.reset_index(),\n", + " on=['region', 'product'],\n", + " how='left'\n", + ")\n", + "\n", + "print(\"\\nOrders with sales targets:\")\n", + "print(orders_with_targets[['order_id', 'region', 'product', 'amount', 'sales_target', 'commission_rate']].head())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 5. Concatenation Operations\n", + "\n", + "Combining DataFrames vertically and horizontally." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Vertical concatenation (stacking DataFrames)\n", + "print(\"=== VERTICAL CONCATENATION ===\")\n", + "\n", + "# Create additional customer data (new batch)\n", + "new_customers = pd.DataFrame({\n", + " 'customer_id': [11, 12, 13, 14, 15],\n", + " 'customer_name': ['Kate Wilson', 'Liam Brown', 'Mia Garcia', 'Noah Jones', 'Olivia Miller'],\n", + " 'email': ['kate@email.com', 'liam@email.com', 'mia@email.com', 'noah@email.com', 'olivia@email.com'],\n", + " 'age': [26, 39, 31, 44, 28],\n", + " 'city': ['Austin', 'Seattle', 'Denver', 'Boston', 'Miami'],\n", + " 'signup_date': pd.date_range('2024-01-01', periods=5, freq='M')\n", + "})\n", + "\n", + "# Concatenate vertically\n", + "all_customers = pd.concat([df_customers, new_customers], ignore_index=True)\n", + "print(f\"Original customers: {len(df_customers)}\")\n", + "print(f\"New customers: {len(new_customers)}\")\n", + "print(f\"Combined customers: {len(all_customers)}\")\n", + "\n", + "print(\"\\nCombined customer data:\")\n", + "print(all_customers.tail())\n", + "\n", + "# Concatenation with different columns\n", + "customers_with_extra_info = pd.DataFrame({\n", + " 'customer_id': [16, 17],\n", + " 'customer_name': ['Paul Davis', 'Quinn Taylor'],\n", + " 'email': ['paul@email.com', 'quinn@email.com'],\n", + " 'age': [35, 29],\n", + " 'city': ['Portland', 'Nashville'],\n", + " 'signup_date': pd.date_range('2024-06-01', periods=2, freq='M'),\n", + " 'referral_source': ['Google', 'Facebook'] # Extra column\n", + "})\n", + "\n", + "# Concat with different columns (creates NaN for missing columns)\n", + "all_customers_extended = pd.concat([all_customers, customers_with_extra_info], ignore_index=True, sort=False)\n", + "print(f\"\\nAfter adding customers with extra info: {all_customers_extended.shape}\")\n", + "print(\"Missing values in referral_source:\")\n", + "print(all_customers_extended['referral_source'].isnull().sum())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Horizontal concatenation\n", + "print(\"=== HORIZONTAL CONCATENATION ===\")\n", + "\n", + "# Split customer data into parts\n", + "customer_basic_info = df_customers[['customer_id', 'customer_name', 'email']]\n", + "customer_demographics = df_customers[['customer_id', 'age', 'city', 'signup_date']]\n", + "\n", + "print(\"Customer basic info:\")\n", + "print(customer_basic_info.head())\n", + "\n", + "print(\"\\nCustomer demographics:\")\n", + "print(customer_demographics.head())\n", + "\n", + "# Concatenate horizontally (by index)\n", + "customers_recombined = pd.concat([customer_basic_info, customer_demographics.drop('customer_id', axis=1)], axis=1)\n", + "print(f\"\\nRecombined shape: {customers_recombined.shape}\")\n", + "print(customers_recombined.head())\n", + "\n", + "# Verify it matches original\n", + "columns_match = set(customers_recombined.columns) == set(df_customers.columns)\n", + "print(f\"\\nColumns match original: {columns_match}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Concat with keys (creating hierarchical columns)\n", + "print(\"=== CONCAT WITH KEYS ===\")\n", + "\n", + "# Create quarterly sales data\n", + "q1_sales = pd.DataFrame({\n", + " 'product': ['Laptop', 'Phone', 'Tablet'],\n", + " 'units_sold': [50, 75, 30],\n", + " 'revenue': [60000, 60000, 12000]\n", + "})\n", + "\n", + "q2_sales = pd.DataFrame({\n", + " 'product': ['Laptop', 'Phone', 'Tablet'],\n", + " 'units_sold': [45, 80, 35],\n", + " 'revenue': [54000, 64000, 14000]\n", + "})\n", + "\n", + "# Concatenate with keys\n", + "quarterly_sales = pd.concat([q1_sales, q2_sales], keys=['Q1', 'Q2'])\n", + "print(\"Quarterly sales with hierarchical index:\")\n", + "print(quarterly_sales)\n", + "\n", + "# Access specific quarter\n", + "print(\"\\nQ1 sales only:\")\n", + "print(quarterly_sales.loc['Q1'])\n", + "\n", + "# Create summary comparison\n", + "quarterly_comparison = pd.concat([q1_sales.set_index('product'), q2_sales.set_index('product')], \n", + " keys=['Q1', 'Q2'], axis=1)\n", + "print(\"\\nQuarterly comparison (side by side):\")\n", + "print(quarterly_comparison)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 6. Performance and Best Practices\n", + "\n", + "Optimizing merge operations and avoiding common pitfalls." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Performance comparison: merge vs join\n", + "import time\n", + "\n", + "print(\"=== PERFORMANCE COMPARISON ===\")\n", + "\n", + "# Create larger datasets for performance testing\n", + "np.random.seed(42)\n", + "large_customers = pd.DataFrame({\n", + " 'customer_id': range(1, 10001),\n", + " 'customer_name': [f'Customer_{i}' for i in range(1, 10001)],\n", + " 'city': np.random.choice(['New York', 'Los Angeles', 'Chicago'], 10000)\n", + "})\n", + "\n", + "large_orders = pd.DataFrame({\n", + " 'order_id': range(1, 50001),\n", + " 'customer_id': np.random.randint(1, 10001, 50000),\n", + " 'amount': np.random.normal(100, 30, 50000)\n", + "})\n", + "\n", + "print(f\"Large customers: {large_customers.shape}\")\n", + "print(f\"Large orders: {large_orders.shape}\")\n", + "\n", + "# Test merge performance\n", + "start_time = time.time()\n", + "merged_result = pd.merge(large_customers, large_orders, on='customer_id', how='inner')\n", + "merge_time = time.time() - start_time\n", + "\n", + "# Test join performance\n", + "customers_indexed = large_customers.set_index('customer_id')\n", + "orders_indexed = large_orders.set_index('customer_id')\n", + "\n", + "start_time = time.time()\n", + "joined_result = customers_indexed.join(orders_indexed, how='inner')\n", + "join_time = time.time() - start_time\n", + "\n", + "print(f\"\\nMerge time: {merge_time:.4f} seconds\")\n", + "print(f\"Join time: {join_time:.4f} seconds\")\n", + "print(f\"Join is {merge_time/join_time:.2f}x faster\")\n", + "\n", + "print(f\"\\nResults shape - Merge: {merged_result.shape}, Join: {joined_result.shape}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Best practices and common pitfalls\n", + "print(\"=== BEST PRACTICES ===\")\n", + "\n", + "def analyze_merge_keys(df1, df2, key_col):\n", + " \"\"\"Analyze merge keys before joining\"\"\"\n", + " print(f\"\\n--- Analyzing merge on '{key_col}' ---\")\n", + " \n", + " # Check for duplicates\n", + " df1_dups = df1[key_col].duplicated().sum()\n", + " df2_dups = df2[key_col].duplicated().sum()\n", + " \n", + " print(f\"Duplicates in left table: {df1_dups}\")\n", + " print(f\"Duplicates in right table: {df2_dups}\")\n", + " \n", + " # Check for missing values\n", + " df1_missing = df1[key_col].isnull().sum()\n", + " df2_missing = df2[key_col].isnull().sum()\n", + " \n", + " print(f\"Missing values in left table: {df1_missing}\")\n", + " print(f\"Missing values in right table: {df2_missing}\")\n", + " \n", + " # Check overlap\n", + " left_keys = set(df1[key_col].dropna())\n", + " right_keys = set(df2[key_col].dropna())\n", + " \n", + " overlap = left_keys & right_keys\n", + " left_only = left_keys - right_keys\n", + " right_only = right_keys - left_keys\n", + " \n", + " print(f\"Keys in both tables: {len(overlap)}\")\n", + " print(f\"Keys only in left: {len(left_only)}\")\n", + " print(f\"Keys only in right: {len(right_only)}\")\n", + " \n", + " # Predict result sizes\n", + " if df1_dups == 0 and df2_dups == 0:\n", + " inner_size = len(overlap)\n", + " left_size = len(df1)\n", + " right_size = len(df2)\n", + " outer_size = len(left_keys | right_keys)\n", + " else:\n", + " print(\"Warning: Duplicates present, result size may be larger than expected\")\n", + " inner_size = \"Cannot predict (duplicates present)\"\n", + " left_size = \"Cannot predict (duplicates present)\"\n", + " right_size = \"Cannot predict (duplicates present)\"\n", + " outer_size = \"Cannot predict (duplicates present)\"\n", + " \n", + " print(f\"\\nPredicted result sizes:\")\n", + " print(f\"Inner join: {inner_size}\")\n", + " print(f\"Left join: {left_size}\")\n", + " print(f\"Right join: {right_size}\")\n", + " print(f\"Outer join: {outer_size}\")\n", + "\n", + "# Analyze our sample data\n", + "analyze_merge_keys(df_customers, df_orders, 'customer_id')\n", + "analyze_merge_keys(df_customers, df_segments, 'customer_id')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Data validation after merge\n", + "def validate_merge_result(df, expected_rows=None, key_col=None):\n", + " \"\"\"Validate merge results\"\"\"\n", + " print(\"\\n=== MERGE VALIDATION ===\")\n", + " \n", + " print(f\"Result shape: {df.shape}\")\n", + " \n", + " if expected_rows:\n", + " print(f\"Expected rows: {expected_rows}\")\n", + " if len(df) != expected_rows:\n", + " print(\"āš ļø Row count doesn't match expectation!\")\n", + " \n", + " # Check for unexpected duplicates\n", + " if key_col and key_col in df.columns:\n", + " duplicates = df[key_col].duplicated().sum()\n", + " if duplicates > 0:\n", + " print(f\"āš ļø Found {duplicates} duplicate keys after merge\")\n", + " \n", + " # Check for missing values in key columns\n", + " missing_summary = df.isnull().sum()\n", + " critical_missing = missing_summary[missing_summary > 0]\n", + " \n", + " if len(critical_missing) > 0:\n", + " print(\"Missing values after merge:\")\n", + " print(critical_missing)\n", + " \n", + " # Data type consistency\n", + " print(f\"\\nData types:\")\n", + " print(df.dtypes)\n", + " \n", + " return df\n", + "\n", + "# Example validation\n", + "sample_merge = pd.merge(df_customers, df_orders, on='customer_id', how='inner')\n", + "validated_result = validate_merge_result(sample_merge, key_col='customer_id')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Practice Exercises\n", + "\n", + "Apply merging and joining techniques to real-world scenarios:" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [], + "source": [ + "# Exercise 1: Customer Lifetime Value Analysis\n", + "# Create a comprehensive customer analysis by joining:\n", + "# - Customer demographics\n", + "# - Order history\n", + "# - Product information\n", + "# - Customer segments\n", + "# Calculate CLV metrics for each customer\n", + "\n", + "def calculate_customer_lifetime_value(customers, orders, products, segments):\n", + " \"\"\"Calculate comprehensive customer lifetime value metrics\"\"\"\n", + " # Your implementation here\n", + " pass\n", + "\n", + "# clv_analysis = calculate_customer_lifetime_value(df_customers, df_orders, df_products, df_segments)\n", + "# print(\"Customer Lifetime Value Analysis:\")\n", + "# print(clv_analysis.head())" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [], + "source": [ + "# Exercise 2: Data Quality Assessment\n", + "# Create a function that analyzes data quality issues when merging multiple datasets:\n", + "# - Identify orphaned records\n", + "# - Find data inconsistencies\n", + "# - Suggest data cleaning steps\n", + "# - Provide merge recommendations\n", + "\n", + "# Your code here:\n" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [], + "source": [ + "# Exercise 3: Time-series Join Challenge\n", + "# Create a complex time-based join scenario:\n", + "# - Join orders with time-varying product prices\n", + "# - Handle seasonal promotions\n", + "# - Calculate accurate historical revenue\n", + "# - Account for price changes over time\n", + "\n", + "# Your code here:\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Key Takeaways\n", + "\n", + "1. **Join Types**:\n", + " - **Inner**: Only matching records from both tables\n", + " - **Left**: All records from left table + matching from right\n", + " - **Right**: All records from right table + matching from left\n", + " - **Outer**: All records from both tables\n", + "\n", + "2. **Method Selection**:\n", + " - **`pd.merge()`**: Most flexible, works with any columns\n", + " - **`.join()`**: Faster for index-based joins\n", + " - **`pd.concat()`**: For stacking DataFrames vertically/horizontally\n", + "\n", + "3. **Best Practices**:\n", + " - Always analyze merge keys before joining\n", + " - Check for duplicates and missing values\n", + " - Validate results after merging\n", + " - Use appropriate join types for your use case\n", + " - Consider performance implications for large datasets\n", + "\n", + "4. **Common Pitfalls**:\n", + " - Cartesian products from duplicate keys\n", + " - Unexpected result sizes\n", + " - Data type inconsistencies\n", + " - Missing value propagation\n", + "\n", + "## Join Type Selection Guide\n", + "\n", + "| Use Case | Recommended Join | Rationale |\n", + "|----------|-----------------|----------|\n", + "| Customer orders analysis | Inner | Only customers with orders |\n", + "| Customer segmentation | Left | Keep all customers, add segment info |\n", + "| Order validation | Right | Keep all orders, check customer validity |\n", + "| Data completeness analysis | Outer | See all records and identify gaps |\n", + "| Performance-critical operations | Index-based join | Faster execution |\n", + "\n", + "## Performance Tips\n", + "\n", + "1. **Index Usage**: Set indexes for frequently joined columns\n", + "2. **Data Types**: Ensure consistent data types before joining\n", + "3. **Memory Management**: Consider chunking for very large datasets\n", + "4. **Join Order**: Start with smallest datasets\n", + "5. **Validation**: Always validate merge results" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.13.3" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/Session_01/PandasDataFrame-exmples/08_sorting_ranking.ipynb b/Session_01/PandasDataFrame-exmples/08_sorting_ranking.ipynb new file mode 100755 index 0000000..5e5a9df --- /dev/null +++ b/Session_01/PandasDataFrame-exmples/08_sorting_ranking.ipynb @@ -0,0 +1,1408 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Session 1 - DataFrames - Lesson 7: Merging and Joining DataFrames\n", + "\n", + "## Learning Objectives\n", + "- Master different types of joins (inner, outer, left, right)\n", + "- Understand when to use merge vs join vs concat\n", + "- Handle duplicate keys and join conflicts\n", + "- Learn advanced merging techniques and best practices\n", + "- Practice with real-world data integration scenarios\n", + "\n", + "## Prerequisites\n", + "- Completed Lessons 1-6\n", + "- Understanding of relational database concepts (helpful)\n", + "- Basic knowledge of SQL joins (helpful but not required)" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Libraries loaded successfully!\n" + ] + } + ], + "source": [ + "# Import required libraries\n", + "import pandas as pd\n", + "import numpy as np\n", + "from datetime import datetime, timedelta\n", + "import matplotlib.pyplot as plt\n", + "import warnings\n", + "warnings.filterwarnings('ignore')\n", + "\n", + "# Set display options\n", + "pd.set_option('display.max_columns', None)\n", + "pd.set_option('display.max_rows', 50)\n", + "\n", + "print(\"Libraries loaded successfully!\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Creating Sample Datasets\n", + "\n", + "Let's create realistic datasets that represent common business scenarios." + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Sample datasets created:\n", + "Customers: (10, 6)\n", + "Orders: (12, 6)\n", + "Products: (8, 4)\n", + "Segments: (10, 3)\n", + "\n", + "Customers dataset:\n", + " customer_id customer_name email age city signup_date\n", + "0 1 Alice Johnson alice@email.com 28 New York 2023-01-31\n", + "1 2 Bob Smith bob@email.com 35 Los Angeles 2023-02-28\n", + "2 3 Charlie Brown charlie@email.com 42 Chicago 2023-03-31\n", + "3 4 Diana Prince diana@email.com 31 Houston 2023-04-30\n", + "4 5 Eve Wilson eve@email.com 29 Phoenix 2023-05-31\n", + "\n", + "Orders dataset:\n", + " order_id customer_id order_date product quantity amount\n", + "0 101 1 2023-06-04 Laptop 1 1200\n", + "1 102 2 2023-06-11 Phone 2 800\n", + "2 103 1 2023-06-18 Tablet 1 400\n", + "3 104 3 2023-06-25 Laptop 1 1200\n", + "4 105 4 2023-07-02 Monitor 1 300\n" + ] + } + ], + "source": [ + "# Create sample datasets for merging examples\n", + "np.random.seed(42)\n", + "\n", + "# Customer dataset\n", + "customers_data = {\n", + " 'customer_id': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],\n", + " 'customer_name': ['Alice Johnson', 'Bob Smith', 'Charlie Brown', 'Diana Prince', 'Eve Wilson',\n", + " 'Frank Miller', 'Grace Lee', 'Henry Davis', 'Ivy Chen', 'Jack Robinson'],\n", + " 'email': ['alice@email.com', 'bob@email.com', 'charlie@email.com', 'diana@email.com', 'eve@email.com',\n", + " 'frank@email.com', 'grace@email.com', 'henry@email.com', 'ivy@email.com', 'jack@email.com'],\n", + " 'age': [28, 35, 42, 31, 29, 45, 38, 33, 27, 41],\n", + " 'city': ['New York', 'Los Angeles', 'Chicago', 'Houston', 'Phoenix',\n", + " 'Philadelphia', 'San Antonio', 'San Diego', 'Dallas', 'San Jose'],\n", + " 'signup_date': pd.date_range('2023-01-01', periods=10, freq='M')\n", + "}\n", + "\n", + "df_customers = pd.DataFrame(customers_data)\n", + "\n", + "# Orders dataset\n", + "orders_data = {\n", + " 'order_id': [101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112],\n", + " 'customer_id': [1, 2, 1, 3, 4, 2, 5, 1, 6, 11, 3, 2], # Note: customer_id 11 doesn't exist in customers\n", + " 'order_date': pd.date_range('2023-06-01', periods=12, freq='W'),\n", + " 'product': ['Laptop', 'Phone', 'Tablet', 'Laptop', 'Monitor', 'Phone', \n", + " 'Headphones', 'Mouse', 'Keyboard', 'Laptop', 'Tablet', 'Monitor'],\n", + " 'quantity': [1, 2, 1, 1, 1, 1, 3, 2, 1, 1, 2, 1],\n", + " 'amount': [1200, 800, 400, 1200, 300, 800, 150, 50, 75, 1200, 800, 300]\n", + "}\n", + "\n", + "df_orders = pd.DataFrame(orders_data)\n", + "\n", + "# Product information dataset\n", + "products_data = {\n", + " 'product': ['Laptop', 'Phone', 'Tablet', 'Monitor', 'Headphones', 'Mouse', 'Keyboard', 'Webcam'],\n", + " 'category': ['Electronics', 'Electronics', 'Electronics', 'Electronics', \n", + " 'Audio', 'Accessories', 'Accessories', 'Electronics'],\n", + " 'price': [1200, 800, 400, 300, 150, 50, 75, 100],\n", + " 'supplier': ['TechCorp', 'MobileCorp', 'TechCorp', 'DisplayCorp', \n", + " 'AudioCorp', 'AccessoryCorp', 'AccessoryCorp', 'TechCorp']\n", + "}\n", + "\n", + "df_products = pd.DataFrame(products_data)\n", + "\n", + "# Customer segments dataset\n", + "segments_data = {\n", + " 'customer_id': [1, 2, 3, 4, 5, 6, 7, 8, 12, 13], # Some customers not in main customer table\n", + " 'segment': ['Premium', 'Standard', 'Premium', 'Standard', 'Basic', \n", + " 'Premium', 'Standard', 'Basic', 'Premium', 'Standard'],\n", + " 'loyalty_points': [1500, 800, 1200, 600, 200, 1800, 750, 300, 2000, 900]\n", + "}\n", + "\n", + "df_segments = pd.DataFrame(segments_data)\n", + "\n", + "print(\"Sample datasets created:\")\n", + "print(f\"Customers: {df_customers.shape}\")\n", + "print(f\"Orders: {df_orders.shape}\")\n", + "print(f\"Products: {df_products.shape}\")\n", + "print(f\"Segments: {df_segments.shape}\")\n", + "\n", + "print(\"\\nCustomers dataset:\")\n", + "print(df_customers.head())\n", + "\n", + "print(\"\\nOrders dataset:\")\n", + "print(df_orders.head())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 1. Basic Merge Operations\n", + "\n", + "Understanding the fundamental merge operations and join types." + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "=== INNER JOIN ===\n", + "Result shape: (11, 11)\n", + "Sample results:\n", + " customer_name order_id product amount\n", + "0 Alice Johnson 101 Laptop 1200\n", + "1 Alice Johnson 103 Tablet 400\n", + "2 Alice Johnson 108 Mouse 50\n", + "3 Bob Smith 102 Phone 800\n", + "4 Bob Smith 106 Phone 800\n", + "\n", + "Unique customers in result: 6\n", + "Total orders: 11\n", + "Customers with orders: [np.int64(1), np.int64(2), np.int64(3), np.int64(4), np.int64(5), np.int64(6)]\n" + ] + } + ], + "source": [ + "# Inner Join - only matching records\n", + "print(\"=== INNER JOIN ===\")\n", + "inner_join = pd.merge(df_customers, df_orders, on='customer_id', how='inner')\n", + "print(f\"Result shape: {inner_join.shape}\")\n", + "print(\"Sample results:\")\n", + "print(inner_join[['customer_name', 'order_id', 'product', 'amount']].head())\n", + "\n", + "print(f\"\\nUnique customers in result: {inner_join['customer_id'].nunique()}\")\n", + "print(f\"Total orders: {len(inner_join)}\")\n", + "\n", + "# Check which customers have orders\n", + "customers_with_orders = inner_join['customer_id'].unique()\n", + "print(f\"Customers with orders: {sorted(customers_with_orders)}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "=== LEFT JOIN ===\n", + "Result shape: (15, 11)\n", + "Sample results:\n", + " customer_name order_id product amount\n", + "0 Alice Johnson 101.0 Laptop 1200.0\n", + "1 Alice Johnson 103.0 Tablet 400.0\n", + "2 Alice Johnson 108.0 Mouse 50.0\n", + "3 Bob Smith 102.0 Phone 800.0\n", + "4 Bob Smith 106.0 Phone 800.0\n", + "5 Bob Smith 112.0 Monitor 300.0\n", + "6 Charlie Brown 104.0 Laptop 1200.0\n", + "7 Charlie Brown 111.0 Tablet 800.0\n", + "8 Diana Prince 105.0 Monitor 300.0\n", + "9 Eve Wilson 107.0 Headphones 150.0\n", + "\n", + "Customers without orders: ['Grace Lee', 'Henry Davis', 'Ivy Chen', 'Jack Robinson']\n", + "\n", + "Total records: 15\n", + "Records with orders: 11\n", + "Records without orders: 4\n" + ] + } + ], + "source": [ + "# Left Join - all records from left table\n", + "print(\"=== LEFT JOIN ===\")\n", + "left_join = pd.merge(df_customers, df_orders, on='customer_id', how='left')\n", + "print(f\"Result shape: {left_join.shape}\")\n", + "print(\"Sample results:\")\n", + "print(left_join[['customer_name', 'order_id', 'product', 'amount']].head(10))\n", + "\n", + "# Check customers without orders\n", + "customers_without_orders = left_join[left_join['order_id'].isnull()]['customer_name'].tolist()\n", + "print(f\"\\nCustomers without orders: {customers_without_orders}\")\n", + "\n", + "# Summary statistics\n", + "print(f\"\\nTotal records: {len(left_join)}\")\n", + "print(f\"Records with orders: {left_join['order_id'].notna().sum()}\")\n", + "print(f\"Records without orders: {left_join['order_id'].isnull().sum()}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "=== RIGHT JOIN ===\n", + "Result shape: (12, 11)\n", + "Sample results:\n", + " customer_name order_id product amount\n", + "0 Alice Johnson 101 Laptop 1200\n", + "1 Bob Smith 102 Phone 800\n", + "2 Alice Johnson 103 Tablet 400\n", + "3 Charlie Brown 104 Laptop 1200\n", + "4 Diana Prince 105 Monitor 300\n", + "\n", + "Orders without customer info: 1\n", + " customer_id order_id product amount\n", + "9 11 110 Laptop 1200\n" + ] + } + ], + "source": [ + "# Right Join - all records from right table\n", + "print(\"=== RIGHT JOIN ===\")\n", + "right_join = pd.merge(df_customers, df_orders, on='customer_id', how='right')\n", + "print(f\"Result shape: {right_join.shape}\")\n", + "print(\"Sample results:\")\n", + "print(right_join[['customer_name', 'order_id', 'product', 'amount']].head())\n", + "\n", + "# Check orders without customer information\n", + "orders_without_customers = right_join[right_join['customer_name'].isnull()]\n", + "print(f\"\\nOrders without customer info: {len(orders_without_customers)}\")\n", + "if len(orders_without_customers) > 0:\n", + " print(orders_without_customers[['customer_id', 'order_id', 'product', 'amount']])" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "=== OUTER JOIN ===\n", + "Result shape: (16, 11)\n", + "\n", + "Data quality analysis:\n", + "Records with complete customer info: 15\n", + "Records with complete order info: 12\n", + "Records with both customer and order info: 11\n", + "\n", + "Customers without orders:\n", + " customer_name city\n", + "11 Grace Lee San Antonio\n", + "12 Henry Davis San Diego\n", + "13 Ivy Chen Dallas\n", + "14 Jack Robinson San Jose\n", + "\n", + "Orders without customer data:\n", + " customer_id order_id product amount\n", + "15 11 110.0 Laptop 1200.0\n" + ] + } + ], + "source": [ + "# Outer Join - all records from both tables\n", + "print(\"=== OUTER JOIN ===\")\n", + "outer_join = pd.merge(df_customers, df_orders, on='customer_id', how='outer')\n", + "print(f\"Result shape: {outer_join.shape}\")\n", + "\n", + "# Analyze the result\n", + "print(\"\\nData quality analysis:\")\n", + "print(f\"Records with complete customer info: {outer_join['customer_name'].notna().sum()}\")\n", + "print(f\"Records with complete order info: {outer_join['order_id'].notna().sum()}\")\n", + "print(f\"Records with both customer and order info: {(outer_join['customer_name'].notna() & outer_join['order_id'].notna()).sum()}\")\n", + "\n", + "# Show different categories of records\n", + "print(\"\\nCustomers without orders:\")\n", + "customers_only = outer_join[(outer_join['customer_name'].notna()) & (outer_join['order_id'].isnull())]\n", + "print(customers_only[['customer_name', 'city']].drop_duplicates())\n", + "\n", + "print(\"\\nOrders without customer data:\")\n", + "orders_only = outer_join[(outer_join['customer_name'].isnull()) & (outer_join['order_id'].notna())]\n", + "print(orders_only[['customer_id', 'order_id', 'product', 'amount']])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 2. Multiple Table Joins\n", + "\n", + "Combining data from multiple sources in sequence." + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "=== THREE-WAY JOIN ===\n", + "After joining customers and orders: (11, 11)\n", + "After joining with products: (11, 14)\n", + "\n", + "Complete order information:\n", + " customer_name order_id product category quantity amount price \\\n", + "0 Alice Johnson 101 Laptop Electronics 1 1200 1200 \n", + "1 Alice Johnson 103 Tablet Electronics 1 400 400 \n", + "2 Alice Johnson 108 Mouse Accessories 2 50 50 \n", + "3 Bob Smith 102 Phone Electronics 2 800 800 \n", + "4 Bob Smith 106 Phone Electronics 1 800 800 \n", + "\n", + " supplier \n", + "0 TechCorp \n", + "1 TechCorp \n", + "2 AccessoryCorp \n", + "3 MobileCorp \n", + "4 MobileCorp \n", + "\n", + "Data consistency check:\n", + "Order amounts match calculated amounts: False\n", + "\n", + "Mismatched records: 3\n", + " order_id product amount calculated_amount\n", + "2 108 Mouse 50 100\n", + "3 102 Phone 800 1600\n", + "9 107 Headphones 150 450\n" + ] + } + ], + "source": [ + "# Three-way join: Customers + Orders + Products\n", + "print(\"=== THREE-WAY JOIN ===\")\n", + "\n", + "# Step 1: Join customers and orders\n", + "customer_orders = pd.merge(df_customers, df_orders, on='customer_id', how='inner')\n", + "print(f\"After joining customers and orders: {customer_orders.shape}\")\n", + "\n", + "# Step 2: Join with products\n", + "complete_data = pd.merge(customer_orders, df_products, on='product', how='left')\n", + "print(f\"After joining with products: {complete_data.shape}\")\n", + "\n", + "# Display comprehensive view\n", + "print(\"\\nComplete order information:\")\n", + "display_cols = ['customer_name', 'order_id', 'product', 'category', 'quantity', 'amount', 'price', 'supplier']\n", + "print(complete_data[display_cols].head())\n", + "\n", + "# Verify data consistency\n", + "print(\"\\nData consistency check:\")\n", + "# Check if order amount matches product price * quantity\n", + "complete_data['calculated_amount'] = complete_data['price'] * complete_data['quantity']\n", + "amount_matches = (complete_data['amount'] == complete_data['calculated_amount']).all()\n", + "print(f\"Order amounts match calculated amounts: {amount_matches}\")\n", + "\n", + "if not amount_matches:\n", + " mismatched = complete_data[complete_data['amount'] != complete_data['calculated_amount']]\n", + " print(f\"\\nMismatched records: {len(mismatched)}\")\n", + " print(mismatched[['order_id', 'product', 'amount', 'calculated_amount']])" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "=== ADDING CUSTOMER SEGMENTS ===\n", + "Customers with segments shape: (10, 8)\n", + "\n", + "Customers without segment info: 2\n", + " customer_name city\n", + "8 Ivy Chen Dallas\n", + "9 Jack Robinson San Jose\n", + "\n", + "Full customer profile shape: (11, 17)\n", + "\n", + "Revenue by customer segment:\n", + " Total_Revenue Avg_Order_Value Total_Orders Unique_Customers\n", + "segment \n", + "Basic 150 150.00 1 1\n", + "Premium 3725 620.83 6 3\n", + "Standard 2200 550.00 4 2\n" + ] + } + ], + "source": [ + "# Add customer segment information\n", + "print(\"=== ADDING CUSTOMER SEGMENTS ===\")\n", + "\n", + "# Join with segments (left join to keep all customers)\n", + "customers_with_segments = pd.merge(df_customers, df_segments, on='customer_id', how='left')\n", + "print(f\"Customers with segments shape: {customers_with_segments.shape}\")\n", + "\n", + "# Check which customers don't have segment information\n", + "missing_segments = customers_with_segments[customers_with_segments['segment'].isnull()]\n", + "print(f\"\\nCustomers without segment info: {len(missing_segments)}\")\n", + "if len(missing_segments) > 0:\n", + " print(missing_segments[['customer_name', 'city']])\n", + "\n", + "# Create comprehensive customer profile\n", + "full_customer_profile = pd.merge(complete_data, df_segments, on='customer_id', how='left')\n", + "print(f\"\\nFull customer profile shape: {full_customer_profile.shape}\")\n", + "\n", + "# Analyze by segment\n", + "segment_analysis = full_customer_profile.groupby('segment').agg({\n", + " 'amount': ['sum', 'mean', 'count'],\n", + " 'customer_id': 'nunique'\n", + "}).round(2)\n", + "segment_analysis.columns = ['Total_Revenue', 'Avg_Order_Value', 'Total_Orders', 'Unique_Customers']\n", + "print(\"\\nRevenue by customer segment:\")\n", + "print(segment_analysis)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 3. Advanced Merge Techniques\n", + "\n", + "Handling complex merging scenarios and edge cases." + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "=== MERGE WITH DIFFERENT COLUMN NAMES ===\n", + "Merge with different column names:\n", + " customer_name customer_id cust_id income_range education\n", + "0 Alice Johnson 1 1.0 50-75k Bachelor\n", + "1 Bob Smith 2 2.0 75-100k Master\n", + "2 Charlie Brown 3 3.0 50-75k PhD\n", + "3 Diana Prince 4 4.0 100k+ Master\n", + "4 Eve Wilson 5 5.0 25-50k Bachelor\n", + "\n", + "After cleanup: (10, 9)\n" + ] + } + ], + "source": [ + "# Merge with different column names\n", + "print(\"=== MERGE WITH DIFFERENT COLUMN NAMES ===\")\n", + "\n", + "# Create a dataset with different column name\n", + "customer_demographics = pd.DataFrame({\n", + " 'cust_id': [1, 2, 3, 4, 5],\n", + " 'income_range': ['50-75k', '75-100k', '50-75k', '100k+', '25-50k'],\n", + " 'education': ['Bachelor', 'Master', 'PhD', 'Master', 'Bachelor'],\n", + " 'occupation': ['Engineer', 'Manager', 'Professor', 'Director', 'Analyst']\n", + "})\n", + "\n", + "# Merge using left_on and right_on parameters\n", + "customers_with_demographics = pd.merge(\n", + " df_customers, \n", + " customer_demographics, \n", + " left_on='customer_id', \n", + " right_on='cust_id', \n", + " how='left'\n", + ")\n", + "\n", + "print(\"Merge with different column names:\")\n", + "print(customers_with_demographics[['customer_name', 'customer_id', 'cust_id', 'income_range', 'education']].head())\n", + "\n", + "# Clean up duplicate columns\n", + "customers_with_demographics = customers_with_demographics.drop('cust_id', axis=1)\n", + "print(f\"\\nAfter cleanup: {customers_with_demographics.shape}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "=== MERGE ON MULTIPLE COLUMNS ===\n", + "Orders with time-based pricing:\n", + " order_id product order_date amount price promotion\n", + "0 101 Laptop 2023-06-04 1200 1200.0 False\n", + "1 102 Phone 2023-06-11 800 800.0 False\n", + "2 103 Tablet 2023-06-18 400 400.0 False\n", + "3 104 Laptop 2023-06-25 1200 1200.0 False\n", + "4 105 Monitor 2023-07-02 300 NaN NaN\n", + "\n", + "Orders with pricing discrepancies: 3\n" + ] + } + ], + "source": [ + "# Merge on multiple columns\n", + "print(\"=== MERGE ON MULTIPLE COLUMNS ===\")\n", + "\n", + "# Create time-based pricing data\n", + "pricing_data = pd.DataFrame({\n", + " 'product': ['Laptop', 'Laptop', 'Phone', 'Phone', 'Tablet', 'Tablet'],\n", + " 'date': pd.to_datetime(['2023-06-01', '2023-08-01', '2023-06-01', '2023-08-01', '2023-06-01', '2023-08-01']),\n", + " 'price': [1200, 1100, 800, 750, 400, 380],\n", + " 'promotion': [False, True, False, True, False, True]\n", + "})\n", + "\n", + "# Add year-month to orders for matching\n", + "df_orders_with_period = df_orders.copy()\n", + "df_orders_with_period['order_month'] = df_orders_with_period['order_date'].dt.to_period('M').dt.start_time\n", + "\n", + "# Create matching periods in pricing data\n", + "pricing_data['period'] = pricing_data['date'].dt.to_period('M').dt.start_time\n", + "\n", + "# Merge on product and time period\n", + "orders_with_pricing = pd.merge(\n", + " df_orders_with_period,\n", + " pricing_data,\n", + " left_on=['product', 'order_month'],\n", + " right_on=['product', 'period'],\n", + " how='left'\n", + ")\n", + "\n", + "print(\"Orders with time-based pricing:\")\n", + "print(orders_with_pricing[['order_id', 'product', 'order_date', 'amount', 'price', 'promotion']].head())\n", + "\n", + "# Check for pricing discrepancies\n", + "pricing_discrepancies = orders_with_pricing[\n", + " (orders_with_pricing['amount'] != orders_with_pricing['price'] * orders_with_pricing['quantity']) &\n", + " orders_with_pricing['price'].notna()\n", + "]\n", + "print(f\"\\nOrders with pricing discrepancies: {len(pricing_discrepancies)}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "=== HANDLING DUPLICATE KEYS ===\n", + "Customer contacts with duplicates:\n", + " customer_id contact_type contact_value is_primary\n", + "0 1 email alice@email.com True\n", + "1 1 phone 555-0101 False\n", + "2 2 email bob@email.com True\n", + "3 2 phone 555-0102 True\n", + "4 3 email charlie@email.com True\n", + "\n", + "Result of merge with duplicates: (5, 9)\n", + " customer_name contact_type contact_value is_primary\n", + "0 Alice Johnson email alice@email.com True\n", + "1 Alice Johnson phone 555-0101 False\n", + "2 Bob Smith email bob@email.com True\n", + "3 Bob Smith phone 555-0102 True\n", + "4 Charlie Brown email charlie@email.com True\n", + "\n", + "After filtering to primary contacts: (11, 9)\n", + "\n", + "Pivoted contacts:\n", + "contact_type customer_id email phone\n", + "0 1 alice@email.com 555-0101\n", + "1 2 bob@email.com 555-0102\n", + "2 3 charlie@email.com NaN\n", + "\n", + "After merging pivoted contacts: (10, 8)\n" + ] + } + ], + "source": [ + "# Handling duplicate keys in merge\n", + "print(\"=== HANDLING DUPLICATE KEYS ===\")\n", + "\n", + "# Create data with duplicate keys\n", + "customer_contacts = pd.DataFrame({\n", + " 'customer_id': [1, 1, 2, 2, 3],\n", + " 'contact_type': ['email', 'phone', 'email', 'phone', 'email'],\n", + " 'contact_value': ['alice@email.com', '555-0101', 'bob@email.com', '555-0102', 'charlie@email.com'],\n", + " 'is_primary': [True, False, True, True, True]\n", + "})\n", + "\n", + "print(\"Customer contacts with duplicates:\")\n", + "print(customer_contacts)\n", + "\n", + "# Merge will create cartesian product for duplicate keys\n", + "customers_with_contacts = pd.merge(df_customers, customer_contacts, on='customer_id', how='inner')\n", + "print(f\"\\nResult of merge with duplicates: {customers_with_contacts.shape}\")\n", + "print(customers_with_contacts[['customer_name', 'contact_type', 'contact_value', 'is_primary']].head())\n", + "\n", + "# Strategy 1: Filter before merge\n", + "primary_contacts = customer_contacts[customer_contacts['is_primary'] == True]\n", + "customers_primary_contacts = pd.merge(df_customers, primary_contacts, on='customer_id', how='left')\n", + "print(f\"\\nAfter filtering to primary contacts: {customers_primary_contacts.shape}\")\n", + "\n", + "# Strategy 2: Pivot contacts to columns\n", + "contacts_pivoted = customer_contacts.pivot_table(\n", + " index='customer_id',\n", + " columns='contact_type',\n", + " values='contact_value',\n", + " aggfunc='first'\n", + ").reset_index()\n", + "print(\"\\nPivoted contacts:\")\n", + "print(contacts_pivoted)\n", + "\n", + "customers_with_pivoted_contacts = pd.merge(df_customers, contacts_pivoted, on='customer_id', how='left')\n", + "print(f\"\\nAfter merging pivoted contacts: {customers_with_pivoted_contacts.shape}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 4. Index-based Joins\n", + "\n", + "Using DataFrame indices for joining operations." + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "=== INDEX-BASED JOINS ===\n", + "Customers with index:\n", + " customer_name email age city signup_date\n", + "customer_id \n", + "1 Alice Johnson alice@email.com 28 New York 2023-01-31\n", + "2 Bob Smith bob@email.com 35 Los Angeles 2023-02-28\n", + "3 Charlie Brown charlie@email.com 42 Chicago 2023-03-31\n", + "4 Diana Prince diana@email.com 31 Houston 2023-04-30\n", + "5 Eve Wilson eve@email.com 29 Phoenix 2023-05-31\n", + "\n", + "Joined by index shape: (10, 7)\n", + " customer_name city segment loyalty_points\n", + "customer_id \n", + "1 Alice Johnson New York Premium 1500.0\n", + "2 Bob Smith Los Angeles Standard 800.0\n", + "3 Charlie Brown Chicago Premium 1200.0\n", + "4 Diana Prince Houston Standard 600.0\n", + "5 Eve Wilson Phoenix Basic 200.0\n", + "\n", + "Equivalent merge shape: (10, 8)\n", + "Results are identical: True\n" + ] + } + ], + "source": [ + "# Set up DataFrames with indices\n", + "print(\"=== INDEX-BASED JOINS ===\")\n", + "\n", + "# Set customer_id as index\n", + "customers_indexed = df_customers.set_index('customer_id')\n", + "segments_indexed = df_segments.set_index('customer_id')\n", + "\n", + "print(\"Customers with index:\")\n", + "print(customers_indexed.head())\n", + "\n", + "# Join using indices\n", + "joined_by_index = customers_indexed.join(segments_indexed, how='left')\n", + "print(f\"\\nJoined by index shape: {joined_by_index.shape}\")\n", + "print(joined_by_index[['customer_name', 'city', 'segment', 'loyalty_points']].head())\n", + "\n", + "# Compare with merge\n", + "merged_equivalent = pd.merge(df_customers, df_segments, on='customer_id', how='left')\n", + "print(f\"\\nEquivalent merge shape: {merged_equivalent.shape}\")\n", + "\n", + "# Verify they're the same (after sorting)\n", + "joined_sorted = joined_by_index.reset_index().sort_values('customer_id')\n", + "merged_sorted = merged_equivalent.sort_values('customer_id')\n", + "are_equal = joined_sorted.equals(merged_sorted)\n", + "print(f\"Results are identical: {are_equal}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "=== MULTI-INDEX JOINS ===\n", + "Sales targets with multi-index:\n", + " sales_target commission_rate\n", + "region product \n", + "North Laptop 10 0.05\n", + " Phone 15 0.04\n", + "South Laptop 8 0.06\n", + " Phone 12 0.05\n", + "East Laptop 12 0.05\n", + " Phone 18 0.04\n", + "\n", + "Orders with sales targets:\n", + " order_id region product amount sales_target commission_rate\n", + "0 101 North Laptop 1200 10.0 0.05\n", + "1 102 South Phone 800 12.0 0.05\n", + "2 103 North Tablet 400 NaN NaN\n", + "3 104 East Laptop 1200 12.0 0.05\n", + "4 105 North Monitor 300 NaN NaN\n" + ] + } + ], + "source": [ + "# Multi-index joins\n", + "print(\"=== MULTI-INDEX JOINS ===\")\n", + "\n", + "# Create a dataset with multiple index levels\n", + "sales_by_region_product = pd.DataFrame({\n", + " 'region': ['North', 'North', 'South', 'South', 'East', 'East'],\n", + " 'product': ['Laptop', 'Phone', 'Laptop', 'Phone', 'Laptop', 'Phone'],\n", + " 'sales_target': [10, 15, 8, 12, 12, 18],\n", + " 'commission_rate': [0.05, 0.04, 0.06, 0.05, 0.05, 0.04]\n", + "})\n", + "\n", + "# Set multi-index\n", + "sales_targets = sales_by_region_product.set_index(['region', 'product'])\n", + "print(\"Sales targets with multi-index:\")\n", + "print(sales_targets)\n", + "\n", + "# Create customer orders with region mapping\n", + "customer_regions = {\n", + " 1: 'North', 2: 'South', 3: 'East', 4: 'North', 5: 'South', 6: 'East'\n", + "}\n", + "\n", + "orders_with_region = df_orders.copy()\n", + "orders_with_region['region'] = orders_with_region['customer_id'].map(customer_regions)\n", + "orders_with_region = orders_with_region.dropna(subset=['region'])\n", + "\n", + "# Merge on multiple columns to match multi-index\n", + "orders_with_targets = pd.merge(\n", + " orders_with_region,\n", + " sales_targets.reset_index(),\n", + " on=['region', 'product'],\n", + " how='left'\n", + ")\n", + "\n", + "print(\"\\nOrders with sales targets:\")\n", + "print(orders_with_targets[['order_id', 'region', 'product', 'amount', 'sales_target', 'commission_rate']].head())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 5. Concatenation Operations\n", + "\n", + "Combining DataFrames vertically and horizontally." + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "=== VERTICAL CONCATENATION ===\n", + "Original customers: 10\n", + "New customers: 5\n", + "Combined customers: 15\n", + "\n", + "Combined customer data:\n", + " customer_id customer_name email age city signup_date\n", + "10 11 Kate Wilson kate@email.com 26 Austin 2024-01-31\n", + "11 12 Liam Brown liam@email.com 39 Seattle 2024-02-29\n", + "12 13 Mia Garcia mia@email.com 31 Denver 2024-03-31\n", + "13 14 Noah Jones noah@email.com 44 Boston 2024-04-30\n", + "14 15 Olivia Miller olivia@email.com 28 Miami 2024-05-31\n", + "\n", + "After adding customers with extra info: (17, 7)\n", + "Missing values in referral_source:\n", + "15\n" + ] + } + ], + "source": [ + "# Vertical concatenation (stacking DataFrames)\n", + "print(\"=== VERTICAL CONCATENATION ===\")\n", + "\n", + "# Create additional customer data (new batch)\n", + "new_customers = pd.DataFrame({\n", + " 'customer_id': [11, 12, 13, 14, 15],\n", + " 'customer_name': ['Kate Wilson', 'Liam Brown', 'Mia Garcia', 'Noah Jones', 'Olivia Miller'],\n", + " 'email': ['kate@email.com', 'liam@email.com', 'mia@email.com', 'noah@email.com', 'olivia@email.com'],\n", + " 'age': [26, 39, 31, 44, 28],\n", + " 'city': ['Austin', 'Seattle', 'Denver', 'Boston', 'Miami'],\n", + " 'signup_date': pd.date_range('2024-01-01', periods=5, freq='M')\n", + "})\n", + "\n", + "# Concatenate vertically\n", + "all_customers = pd.concat([df_customers, new_customers], ignore_index=True)\n", + "print(f\"Original customers: {len(df_customers)}\")\n", + "print(f\"New customers: {len(new_customers)}\")\n", + "print(f\"Combined customers: {len(all_customers)}\")\n", + "\n", + "print(\"\\nCombined customer data:\")\n", + "print(all_customers.tail())\n", + "\n", + "# Concatenation with different columns\n", + "customers_with_extra_info = pd.DataFrame({\n", + " 'customer_id': [16, 17],\n", + " 'customer_name': ['Paul Davis', 'Quinn Taylor'],\n", + " 'email': ['paul@email.com', 'quinn@email.com'],\n", + " 'age': [35, 29],\n", + " 'city': ['Portland', 'Nashville'],\n", + " 'signup_date': pd.date_range('2024-06-01', periods=2, freq='M'),\n", + " 'referral_source': ['Google', 'Facebook'] # Extra column\n", + "})\n", + "\n", + "# Concat with different columns (creates NaN for missing columns)\n", + "all_customers_extended = pd.concat([all_customers, customers_with_extra_info], ignore_index=True, sort=False)\n", + "print(f\"\\nAfter adding customers with extra info: {all_customers_extended.shape}\")\n", + "print(\"Missing values in referral_source:\")\n", + "print(all_customers_extended['referral_source'].isnull().sum())" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "=== HORIZONTAL CONCATENATION ===\n", + "Customer basic info:\n", + " customer_id customer_name email\n", + "0 1 Alice Johnson alice@email.com\n", + "1 2 Bob Smith bob@email.com\n", + "2 3 Charlie Brown charlie@email.com\n", + "3 4 Diana Prince diana@email.com\n", + "4 5 Eve Wilson eve@email.com\n", + "\n", + "Customer demographics:\n", + " customer_id age city signup_date\n", + "0 1 28 New York 2023-01-31\n", + "1 2 35 Los Angeles 2023-02-28\n", + "2 3 42 Chicago 2023-03-31\n", + "3 4 31 Houston 2023-04-30\n", + "4 5 29 Phoenix 2023-05-31\n", + "\n", + "Recombined shape: (10, 6)\n", + " customer_id customer_name email age city signup_date\n", + "0 1 Alice Johnson alice@email.com 28 New York 2023-01-31\n", + "1 2 Bob Smith bob@email.com 35 Los Angeles 2023-02-28\n", + "2 3 Charlie Brown charlie@email.com 42 Chicago 2023-03-31\n", + "3 4 Diana Prince diana@email.com 31 Houston 2023-04-30\n", + "4 5 Eve Wilson eve@email.com 29 Phoenix 2023-05-31\n", + "\n", + "Columns match original: True\n" + ] + } + ], + "source": [ + "# Horizontal concatenation\n", + "print(\"=== HORIZONTAL CONCATENATION ===\")\n", + "\n", + "# Split customer data into parts\n", + "customer_basic_info = df_customers[['customer_id', 'customer_name', 'email']]\n", + "customer_demographics = df_customers[['customer_id', 'age', 'city', 'signup_date']]\n", + "\n", + "print(\"Customer basic info:\")\n", + "print(customer_basic_info.head())\n", + "\n", + "print(\"\\nCustomer demographics:\")\n", + "print(customer_demographics.head())\n", + "\n", + "# Concatenate horizontally (by index)\n", + "customers_recombined = pd.concat([customer_basic_info, customer_demographics.drop('customer_id', axis=1)], axis=1)\n", + "print(f\"\\nRecombined shape: {customers_recombined.shape}\")\n", + "print(customers_recombined.head())\n", + "\n", + "# Verify it matches original\n", + "columns_match = set(customers_recombined.columns) == set(df_customers.columns)\n", + "print(f\"\\nColumns match original: {columns_match}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "=== CONCAT WITH KEYS ===\n", + "Quarterly sales with hierarchical index:\n", + " product units_sold revenue\n", + "Q1 0 Laptop 50 60000\n", + " 1 Phone 75 60000\n", + " 2 Tablet 30 12000\n", + "Q2 0 Laptop 45 54000\n", + " 1 Phone 80 64000\n", + " 2 Tablet 35 14000\n", + "\n", + "Q1 sales only:\n", + " product units_sold revenue\n", + "0 Laptop 50 60000\n", + "1 Phone 75 60000\n", + "2 Tablet 30 12000\n", + "\n", + "Quarterly comparison (side by side):\n", + " Q1 Q2 \n", + " units_sold revenue units_sold revenue\n", + "product \n", + "Laptop 50 60000 45 54000\n", + "Phone 75 60000 80 64000\n", + "Tablet 30 12000 35 14000\n" + ] + } + ], + "source": [ + "# Concat with keys (creating hierarchical columns)\n", + "print(\"=== CONCAT WITH KEYS ===\")\n", + "\n", + "# Create quarterly sales data\n", + "q1_sales = pd.DataFrame({\n", + " 'product': ['Laptop', 'Phone', 'Tablet'],\n", + " 'units_sold': [50, 75, 30],\n", + " 'revenue': [60000, 60000, 12000]\n", + "})\n", + "\n", + "q2_sales = pd.DataFrame({\n", + " 'product': ['Laptop', 'Phone', 'Tablet'],\n", + " 'units_sold': [45, 80, 35],\n", + " 'revenue': [54000, 64000, 14000]\n", + "})\n", + "\n", + "# Concatenate with keys\n", + "quarterly_sales = pd.concat([q1_sales, q2_sales], keys=['Q1', 'Q2'])\n", + "print(\"Quarterly sales with hierarchical index:\")\n", + "print(quarterly_sales)\n", + "\n", + "# Access specific quarter\n", + "print(\"\\nQ1 sales only:\")\n", + "print(quarterly_sales.loc['Q1'])\n", + "\n", + "# Create summary comparison\n", + "quarterly_comparison = pd.concat([q1_sales.set_index('product'), q2_sales.set_index('product')], \n", + " keys=['Q1', 'Q2'], axis=1)\n", + "print(\"\\nQuarterly comparison (side by side):\")\n", + "print(quarterly_comparison)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 6. Performance and Best Practices\n", + "\n", + "Optimizing merge operations and avoiding common pitfalls." + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "=== PERFORMANCE COMPARISON ===\n", + "Large customers: (10000, 3)\n", + "Large orders: (50000, 3)\n", + "\n", + "Merge time: 0.0077 seconds\n", + "Join time: 0.0092 seconds\n", + "Join is 0.83x faster\n", + "\n", + "Results shape - Merge: (50000, 5), Join: (50000, 4)\n" + ] + } + ], + "source": [ + "# Performance comparison: merge vs join\n", + "import time\n", + "\n", + "print(\"=== PERFORMANCE COMPARISON ===\")\n", + "\n", + "# Create larger datasets for performance testing\n", + "np.random.seed(42)\n", + "large_customers = pd.DataFrame({\n", + " 'customer_id': range(1, 10001),\n", + " 'customer_name': [f'Customer_{i}' for i in range(1, 10001)],\n", + " 'city': np.random.choice(['New York', 'Los Angeles', 'Chicago'], 10000)\n", + "})\n", + "\n", + "large_orders = pd.DataFrame({\n", + " 'order_id': range(1, 50001),\n", + " 'customer_id': np.random.randint(1, 10001, 50000),\n", + " 'amount': np.random.normal(100, 30, 50000)\n", + "})\n", + "\n", + "print(f\"Large customers: {large_customers.shape}\")\n", + "print(f\"Large orders: {large_orders.shape}\")\n", + "\n", + "# Test merge performance\n", + "start_time = time.time()\n", + "merged_result = pd.merge(large_customers, large_orders, on='customer_id', how='inner')\n", + "merge_time = time.time() - start_time\n", + "\n", + "# Test join performance\n", + "customers_indexed = large_customers.set_index('customer_id')\n", + "orders_indexed = large_orders.set_index('customer_id')\n", + "\n", + "start_time = time.time()\n", + "joined_result = customers_indexed.join(orders_indexed, how='inner')\n", + "join_time = time.time() - start_time\n", + "\n", + "print(f\"\\nMerge time: {merge_time:.4f} seconds\")\n", + "print(f\"Join time: {join_time:.4f} seconds\")\n", + "print(f\"Join is {merge_time/join_time:.2f}x faster\")\n", + "\n", + "print(f\"\\nResults shape - Merge: {merged_result.shape}, Join: {joined_result.shape}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "=== BEST PRACTICES ===\n", + "\n", + "--- Analyzing merge on 'customer_id' ---\n", + "Duplicates in left table: 0\n", + "Duplicates in right table: 5\n", + "Missing values in left table: 0\n", + "Missing values in right table: 0\n", + "Keys in both tables: 6\n", + "Keys only in left: 4\n", + "Keys only in right: 1\n", + "Warning: Duplicates present, result size may be larger than expected\n", + "\n", + "Predicted result sizes:\n", + "Inner join: Cannot predict (duplicates present)\n", + "Left join: Cannot predict (duplicates present)\n", + "Right join: Cannot predict (duplicates present)\n", + "Outer join: Cannot predict (duplicates present)\n", + "\n", + "--- Analyzing merge on 'customer_id' ---\n", + "Duplicates in left table: 0\n", + "Duplicates in right table: 0\n", + "Missing values in left table: 0\n", + "Missing values in right table: 0\n", + "Keys in both tables: 8\n", + "Keys only in left: 2\n", + "Keys only in right: 2\n", + "\n", + "Predicted result sizes:\n", + "Inner join: 8\n", + "Left join: 10\n", + "Right join: 10\n", + "Outer join: 12\n" + ] + } + ], + "source": [ + "# Best practices and common pitfalls\n", + "print(\"=== BEST PRACTICES ===\")\n", + "\n", + "def analyze_merge_keys(df1, df2, key_col):\n", + " \"\"\"Analyze merge keys before joining\"\"\"\n", + " print(f\"\\n--- Analyzing merge on '{key_col}' ---\")\n", + " \n", + " # Check for duplicates\n", + " df1_dups = df1[key_col].duplicated().sum()\n", + " df2_dups = df2[key_col].duplicated().sum()\n", + " \n", + " print(f\"Duplicates in left table: {df1_dups}\")\n", + " print(f\"Duplicates in right table: {df2_dups}\")\n", + " \n", + " # Check for missing values\n", + " df1_missing = df1[key_col].isnull().sum()\n", + " df2_missing = df2[key_col].isnull().sum()\n", + " \n", + " print(f\"Missing values in left table: {df1_missing}\")\n", + " print(f\"Missing values in right table: {df2_missing}\")\n", + " \n", + " # Check overlap\n", + " left_keys = set(df1[key_col].dropna())\n", + " right_keys = set(df2[key_col].dropna())\n", + " \n", + " overlap = left_keys & right_keys\n", + " left_only = left_keys - right_keys\n", + " right_only = right_keys - left_keys\n", + " \n", + " print(f\"Keys in both tables: {len(overlap)}\")\n", + " print(f\"Keys only in left: {len(left_only)}\")\n", + " print(f\"Keys only in right: {len(right_only)}\")\n", + " \n", + " # Predict result sizes\n", + " if df1_dups == 0 and df2_dups == 0:\n", + " inner_size = len(overlap)\n", + " left_size = len(df1)\n", + " right_size = len(df2)\n", + " outer_size = len(left_keys | right_keys)\n", + " else:\n", + " print(\"Warning: Duplicates present, result size may be larger than expected\")\n", + " inner_size = \"Cannot predict (duplicates present)\"\n", + " left_size = \"Cannot predict (duplicates present)\"\n", + " right_size = \"Cannot predict (duplicates present)\"\n", + " outer_size = \"Cannot predict (duplicates present)\"\n", + " \n", + " print(f\"\\nPredicted result sizes:\")\n", + " print(f\"Inner join: {inner_size}\")\n", + " print(f\"Left join: {left_size}\")\n", + " print(f\"Right join: {right_size}\")\n", + " print(f\"Outer join: {outer_size}\")\n", + "\n", + "# Analyze our sample data\n", + "analyze_merge_keys(df_customers, df_orders, 'customer_id')\n", + "analyze_merge_keys(df_customers, df_segments, 'customer_id')" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "=== MERGE VALIDATION ===\n", + "Result shape: (11, 11)\n", + "āš ļø Found 5 duplicate keys after merge\n", + "\n", + "Data types:\n", + "customer_id int64\n", + "customer_name object\n", + "email object\n", + "age int64\n", + "city object\n", + "signup_date datetime64[ns]\n", + "order_id int64\n", + "order_date datetime64[ns]\n", + "product object\n", + "quantity int64\n", + "amount int64\n", + "dtype: object\n" + ] + } + ], + "source": [ + "# Data validation after merge\n", + "def validate_merge_result(df, expected_rows=None, key_col=None):\n", + " \"\"\"Validate merge results\"\"\"\n", + " print(\"\\n=== MERGE VALIDATION ===\")\n", + " \n", + " print(f\"Result shape: {df.shape}\")\n", + " \n", + " if expected_rows:\n", + " print(f\"Expected rows: {expected_rows}\")\n", + " if len(df) != expected_rows:\n", + " print(\"āš ļø Row count doesn't match expectation!\")\n", + " \n", + " # Check for unexpected duplicates\n", + " if key_col and key_col in df.columns:\n", + " duplicates = df[key_col].duplicated().sum()\n", + " if duplicates > 0:\n", + " print(f\"āš ļø Found {duplicates} duplicate keys after merge\")\n", + " \n", + " # Check for missing values in key columns\n", + " missing_summary = df.isnull().sum()\n", + " critical_missing = missing_summary[missing_summary > 0]\n", + " \n", + " if len(critical_missing) > 0:\n", + " print(\"Missing values after merge:\")\n", + " print(critical_missing)\n", + " \n", + " # Data type consistency\n", + " print(f\"\\nData types:\")\n", + " print(df.dtypes)\n", + " \n", + " return df\n", + "\n", + "# Example validation\n", + "sample_merge = pd.merge(df_customers, df_orders, on='customer_id', how='inner')\n", + "validated_result = validate_merge_result(sample_merge, key_col='customer_id')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Practice Exercises\n", + "\n", + "Apply merging and joining techniques to real-world scenarios:" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "metadata": {}, + "outputs": [], + "source": [ + "# Exercise 1: Customer Lifetime Value Analysis\n", + "# Create a comprehensive customer analysis by joining:\n", + "# - Customer demographics\n", + "# - Order history\n", + "# - Product information\n", + "# - Customer segments\n", + "# Calculate CLV metrics for each customer\n", + "\n", + "def calculate_customer_lifetime_value(customers, orders, products, segments):\n", + " \"\"\"Calculate comprehensive customer lifetime value metrics\"\"\"\n", + " # Your implementation here\n", + " pass\n", + "\n", + "# clv_analysis = calculate_customer_lifetime_value(df_customers, df_orders, df_products, df_segments)\n", + "# print(\"Customer Lifetime Value Analysis:\")\n", + "# print(clv_analysis.head())" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "metadata": {}, + "outputs": [], + "source": [ + "# Exercise 2: Data Quality Assessment\n", + "# Create a function that analyzes data quality issues when merging multiple datasets:\n", + "# - Identify orphaned records\n", + "# - Find data inconsistencies\n", + "# - Suggest data cleaning steps\n", + "# - Provide merge recommendations\n", + "\n", + "# Your code here:\n" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "metadata": {}, + "outputs": [], + "source": [ + "# Exercise 3: Time-series Join Challenge\n", + "# Create a complex time-based join scenario:\n", + "# - Join orders with time-varying product prices\n", + "# - Handle seasonal promotions\n", + "# - Calculate accurate historical revenue\n", + "# - Account for price changes over time\n", + "\n", + "# Your code here:\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Key Takeaways\n", + "\n", + "1. **Join Types**:\n", + " - **Inner**: Only matching records from both tables\n", + " - **Left**: All records from left table + matching from right\n", + " - **Right**: All records from right table + matching from left\n", + " - **Outer**: All records from both tables\n", + "\n", + "2. **Method Selection**:\n", + " - **`pd.merge()`**: Most flexible, works with any columns\n", + " - **`.join()`**: Faster for index-based joins\n", + " - **`pd.concat()`**: For stacking DataFrames vertically/horizontally\n", + "\n", + "3. **Best Practices**:\n", + " - Always analyze merge keys before joining\n", + " - Check for duplicates and missing values\n", + " - Validate results after merging\n", + " - Use appropriate join types for your use case\n", + " - Consider performance implications for large datasets\n", + "\n", + "4. **Common Pitfalls**:\n", + " - Cartesian products from duplicate keys\n", + " - Unexpected result sizes\n", + " - Data type inconsistencies\n", + " - Missing value propagation\n", + "\n", + "## Join Type Selection Guide\n", + "\n", + "| Use Case | Recommended Join | Rationale |\n", + "|----------|-----------------|----------|\n", + "| Customer orders analysis | Inner | Only customers with orders |\n", + "| Customer segmentation | Left | Keep all customers, add segment info |\n", + "| Order validation | Right | Keep all orders, check customer validity |\n", + "| Data completeness analysis | Outer | See all records and identify gaps |\n", + "| Performance-critical operations | Index-based join | Faster execution |\n", + "\n", + "## Performance Tips\n", + "\n", + "1. **Index Usage**: Set indexes for frequently joined columns\n", + "2. **Data Types**: Ensure consistent data types before joining\n", + "3. **Memory Management**: Consider chunking for very large datasets\n", + "4. **Join Order**: Start with smallest datasets\n", + "5. **Validation**: Always validate merge results" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.13.3" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/Session_01/PandasDataFrame-exmples/09_pivot_tables.ipynb b/Session_01/PandasDataFrame-exmples/09_pivot_tables.ipynb new file mode 100755 index 0000000..3546b1e --- /dev/null +++ b/Session_01/PandasDataFrame-exmples/09_pivot_tables.ipynb @@ -0,0 +1,1978 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Session 1 - DataFrames - Lesson 9: Pivot Tables and Data Reshaping\n", + "\n", + "## Learning Objectives\n", + "- Master pivot table creation and customization\n", + "- Understand data reshaping with melt, pivot, stack, and unstack\n", + "- Learn cross-tabulation and contingency tables\n", + "- Practice with multi-level indexing and hierarchical data\n", + "- Apply reshaping techniques to real-world analysis scenarios\n", + "\n", + "## Prerequisites\n", + "- Completed Lessons 1-8\n", + "- Understanding of aggregation and groupby operations\n", + "- Familiarity with Excel pivot tables (helpful but not required)" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Libraries loaded successfully!\n" + ] + } + ], + "source": [ + "# Import required libraries\n", + "import pandas as pd\n", + "import numpy as np\n", + "from datetime import datetime, timedelta\n", + "import matplotlib.pyplot as plt\n", + "import seaborn as sns\n", + "import warnings\n", + "warnings.filterwarnings('ignore')\n", + "\n", + "# Set display options\n", + "pd.set_option('display.max_columns', None)\n", + "pd.set_option('display.max_rows', 20)\n", + "pd.set_option('display.width', None)\n", + "\n", + "print(\"Libraries loaded successfully!\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Creating Sample Dataset\n", + "\n", + "Let's create a comprehensive business dataset for pivot table examples." + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Business dataset created:\n", + "Shape: (1000, 20)\n", + "\n", + "First few rows:\n", + " date salesperson region product_category product customer_type \\\n", + "0 2024-01-01 Grace Lee North Clothing Shoes Returning \n", + "1 2024-01-02 Diana Prince East Clothing Magazine Returning \n", + "2 2024-01-03 Henry Davis West Books Shoes VIP \n", + "3 2024-01-04 Eve Wilson West Clothing Novel New \n", + "4 2024-01-05 Grace Lee East Clothing Laptop VIP \n", + "\n", + " sales_channel quantity unit_price discount_percent shipping_cost \\\n", + "0 Online 1 72.81 0 16.13 \n", + "1 Store 9 99.98 15 10.42 \n", + "2 Online 6 4.61 0 8.73 \n", + "3 Phone 8 31.51 0 8.27 \n", + "4 Store 3 86.28 15 11.61 \n", + "\n", + " gross_sales discount_amount net_sales total_order year month quarter \\\n", + "0 72.81 0.000 72.810 88.940 2024 1 1 \n", + "1 899.82 134.973 764.847 775.267 2024 1 1 \n", + "2 27.66 0.000 27.660 36.390 2024 1 1 \n", + "3 252.08 0.000 252.080 260.350 2024 1 1 \n", + "4 258.84 38.826 220.014 231.624 2024 1 1 \n", + "\n", + " day_of_week month_name \n", + "0 Monday January \n", + "1 Tuesday January \n", + "2 Wednesday January \n", + "3 Thursday January \n", + "4 Friday January \n", + "\n", + "Column info:\n", + "date datetime64[ns]\n", + "salesperson object\n", + "region object\n", + "product_category object\n", + "product object\n", + "customer_type object\n", + "sales_channel object\n", + "quantity int64\n", + "unit_price float64\n", + "discount_percent int64\n", + "shipping_cost float64\n", + "gross_sales float64\n", + "discount_amount float64\n", + "net_sales float64\n", + "total_order float64\n", + "year int32\n", + "month int32\n", + "quarter int32\n", + "day_of_week object\n", + "month_name object\n", + "dtype: object\n" + ] + } + ], + "source": [ + "# Create comprehensive business dataset\n", + "np.random.seed(42)\n", + "n_records = 1000\n", + "\n", + "# Generate realistic business data\n", + "business_data = {\n", + " 'date': pd.date_range('2024-01-01', periods=n_records, freq='D'),\n", + " 'salesperson': np.random.choice([\n", + " 'Alice Johnson', 'Bob Smith', 'Charlie Brown', 'Diana Prince', 'Eve Wilson',\n", + " 'Frank Miller', 'Grace Lee', 'Henry Davis', 'Ivy Chen', 'Jack Robinson'\n", + " ], n_records),\n", + " 'region': np.random.choice(['North', 'South', 'East', 'West'], n_records),\n", + " 'product_category': np.random.choice(['Electronics', 'Clothing', 'Books', 'Home & Garden'], n_records),\n", + " 'product': np.random.choice([\n", + " 'Laptop', 'Phone', 'Tablet', 'Headphones', 'Speaker',\n", + " 'Shirt', 'Pants', 'Shoes', 'Jacket', 'Hat',\n", + " 'Novel', 'Textbook', 'Magazine', 'Comic', 'Cookbook',\n", + " 'Plant', 'Tool', 'Furniture', 'Decoration', 'Garden'\n", + " ], n_records),\n", + " 'customer_type': np.random.choice(['New', 'Returning', 'VIP'], n_records, p=[0.3, 0.5, 0.2]),\n", + " 'sales_channel': np.random.choice(['Online', 'Store', 'Phone'], n_records, p=[0.6, 0.3, 0.1]),\n", + " 'quantity': np.random.randint(1, 10, n_records),\n", + " 'unit_price': np.random.normal(50, 20, n_records),\n", + " 'discount_percent': np.random.choice([0, 5, 10, 15, 20], n_records, p=[0.5, 0.2, 0.2, 0.08, 0.02]),\n", + " 'shipping_cost': np.random.normal(8, 3, n_records)\n", + "}\n", + "\n", + "df_business = pd.DataFrame(business_data)\n", + "\n", + "# Clean and calculate derived fields\n", + "df_business['unit_price'] = np.abs(df_business['unit_price']).round(2)\n", + "df_business['shipping_cost'] = np.abs(df_business['shipping_cost']).round(2)\n", + "df_business['gross_sales'] = df_business['quantity'] * df_business['unit_price']\n", + "df_business['discount_amount'] = df_business['gross_sales'] * df_business['discount_percent'] / 100\n", + "df_business['net_sales'] = df_business['gross_sales'] - df_business['discount_amount']\n", + "df_business['total_order'] = df_business['net_sales'] + df_business['shipping_cost']\n", + "\n", + "# Add time-based columns\n", + "df_business['year'] = df_business['date'].dt.year\n", + "df_business['month'] = df_business['date'].dt.month\n", + "df_business['quarter'] = df_business['date'].dt.quarter\n", + "df_business['day_of_week'] = df_business['date'].dt.day_name()\n", + "df_business['month_name'] = df_business['date'].dt.month_name()\n", + "\n", + "print(\"Business dataset created:\")\n", + "print(f\"Shape: {df_business.shape}\")\n", + "print(\"\\nFirst few rows:\")\n", + "print(df_business.head())\n", + "print(\"\\nColumn info:\")\n", + "print(df_business.dtypes)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 1. Basic Pivot Tables\n", + "\n", + "Creating fundamental pivot tables for data summarization." + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "=== BASIC PIVOT TABLES ===\n", + "Sales by Region and Product Category:\n", + "product_category Books Clothing Electronics Home & Garden\n", + "region \n", + "East 12377.0 12466.0 16463.0 14140.0\n", + "North 13492.0 14938.0 15229.0 17770.0\n", + "South 17590.0 16946.0 14552.0 13695.0\n", + "West 8157.0 16544.0 12681.0 15652.0\n", + "\n", + "Average Order Value by Customer Type and Sales Channel:\n", + "sales_channel Online Phone Store\n", + "customer_type \n", + "New 263.07 244.46 222.16\n", + "Returning 246.03 220.45 224.39\n", + "VIP 234.35 237.16 253.16\n", + "\n", + "Transaction Count by Region and Customer Type:\n", + "customer_type New Returning VIP\n", + "region \n", + "East 60 128 60\n", + "North 70 144 53\n", + "South 68 138 45\n", + "West 61 119 54\n" + ] + } + ], + "source": [ + "# Simple pivot table - sales by region and product category\n", + "print(\"=== BASIC PIVOT TABLES ===\")\n", + "\n", + "# Basic pivot: sum of sales by region and product category\n", + "basic_pivot = df_business.pivot_table(\n", + " values='net_sales',\n", + " index='region',\n", + " columns='product_category',\n", + " aggfunc='sum',\n", + " fill_value=0\n", + ")\n", + "\n", + "print(\"Sales by Region and Product Category:\")\n", + "print(basic_pivot.round(0))\n", + "\n", + "# Average order value by customer type and sales channel\n", + "avg_order_pivot = df_business.pivot_table(\n", + " values='total_order',\n", + " index='customer_type',\n", + " columns='sales_channel',\n", + " aggfunc='mean',\n", + " fill_value=0\n", + ")\n", + "\n", + "print(\"\\nAverage Order Value by Customer Type and Sales Channel:\")\n", + "print(avg_order_pivot.round(2))\n", + "\n", + "# Count of transactions\n", + "transaction_count = df_business.pivot_table(\n", + " values='total_order',\n", + " index='region',\n", + " columns='customer_type',\n", + " aggfunc='count',\n", + " fill_value=0\n", + ")\n", + "\n", + "print(\"\\nTransaction Count by Region and Customer Type:\")\n", + "print(transaction_count)" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "=== MULTIPLE AGGREGATION FUNCTIONS ===\n", + "Multiple aggregations (sum, mean, count):\n", + " sum mean \\\n", + "product_category Books Clothing Electronics Home & Garden Books \n", + "region \n", + "East 12376.85 12465.78 16462.72 14139.75 217.14 \n", + "North 13491.69 14938.35 15228.66 17769.95 214.15 \n", + "South 17589.76 16945.54 14551.58 13695.40 266.51 \n", + "West 8156.64 16544.42 12680.58 15651.63 189.69 \n", + "\n", + " count \\\n", + "product_category Clothing Electronics Home & Garden Books Clothing \n", + "region \n", + "East 197.87 238.59 239.66 57 63 \n", + "North 226.34 220.71 257.54 63 66 \n", + "South 260.70 234.70 236.13 66 65 \n", + "West 239.77 218.63 244.56 43 69 \n", + "\n", + " \n", + "product_category Electronics Home & Garden \n", + "region \n", + "East 69 59 \n", + "North 69 69 \n", + "South 62 58 \n", + "West 58 64 \n", + "\n", + "Mixed aggregations for different metrics:\n", + " net_sales quantity total_order \\\n", + "sales_channel Online Phone Store Online Phone Store Online \n", + "region \n", + "East 34662.14 4164.23 16618.73 731 90 372 246.72 \n", + "North 36698.38 6597.29 18133.00 780 141 370 235.80 \n", + "South 38569.20 5475.73 18737.36 778 128 369 263.71 \n", + "West 38003.32 3745.70 11284.26 805 83 264 245.57 \n", + "\n", + " \n", + "sales_channel Phone Store \n", + "region \n", + "East 239.96 203.20 \n", + "North 243.26 240.63 \n", + "South 227.11 258.94 \n", + "West 204.47 213.05 \n" + ] + } + ], + "source": [ + "# Multiple aggregation functions in one pivot table\n", + "print(\"=== MULTIPLE AGGREGATION FUNCTIONS ===\")\n", + "\n", + "# Multiple aggregations for comprehensive analysis\n", + "multi_agg_pivot = df_business.pivot_table(\n", + " values='net_sales',\n", + " index='region',\n", + " columns='product_category',\n", + " aggfunc=['sum', 'mean', 'count'],\n", + " fill_value=0\n", + ")\n", + "\n", + "print(\"Multiple aggregations (sum, mean, count):\")\n", + "print(multi_agg_pivot.round(2))\n", + "\n", + "# Different values with different aggregations\n", + "mixed_agg_pivot = df_business.pivot_table(\n", + " values=['net_sales', 'quantity', 'total_order'],\n", + " index='region',\n", + " columns='sales_channel',\n", + " aggfunc={\n", + " 'net_sales': 'sum',\n", + " 'quantity': 'sum',\n", + " 'total_order': 'mean'\n", + " },\n", + " fill_value=0\n", + ")\n", + "\n", + "print(\"\\nMixed aggregations for different metrics:\")\n", + "print(mixed_agg_pivot.round(2))" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "=== PIVOT TABLES WITH TOTALS ===\n", + "Pivot table with row and column totals:\n", + "product_category Books Clothing Electronics Home & Garden Total\n", + "region \n", + "East 12377.0 12466.0 16463.0 14140.0 55445.0\n", + "North 13492.0 14938.0 15229.0 17770.0 61429.0\n", + "South 17590.0 16946.0 14552.0 13695.0 62782.0\n", + "West 8157.0 16544.0 12681.0 15652.0 53033.0\n", + "Total 51615.0 60894.0 58924.0 61257.0 232689.0\n", + "\n", + "Sales distribution as percentages:\n", + "product_category Books Clothing Electronics Home & Garden Total\n", + "region \n", + "East 5.32 5.36 7.07 6.08 23.83\n", + "North 5.80 6.42 6.54 7.64 26.40\n", + "South 7.56 7.28 6.25 5.89 26.98\n", + "West 3.51 7.11 5.45 6.73 22.79\n", + "Total 22.18 26.17 25.32 26.33 100.00\n" + ] + } + ], + "source": [ + "# Pivot tables with totals and margins\n", + "print(\"=== PIVOT TABLES WITH TOTALS ===\")\n", + "\n", + "# Add margins (totals) to pivot table\n", + "pivot_with_totals = df_business.pivot_table(\n", + " values='net_sales',\n", + " index='region',\n", + " columns='product_category',\n", + " aggfunc='sum',\n", + " fill_value=0,\n", + " margins=True,\n", + " margins_name='Total'\n", + ")\n", + "\n", + "print(\"Pivot table with row and column totals:\")\n", + "print(pivot_with_totals.round(0))\n", + "\n", + "# Calculate percentages of total\n", + "pivot_percentages = df_business.pivot_table(\n", + " values='net_sales',\n", + " index='region',\n", + " columns='product_category',\n", + " aggfunc='sum',\n", + " fill_value=0,\n", + " margins=True,\n", + " margins_name='Total'\n", + ")\n", + "\n", + "# Convert to percentages (excluding totals row/column for calculation)\n", + "total_sales = pivot_percentages.loc['Total', 'Total']\n", + "pivot_pct = (pivot_percentages / total_sales * 100).round(2)\n", + "\n", + "print(\"\\nSales distribution as percentages:\")\n", + "print(pivot_pct)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 2. Advanced Pivot Table Techniques\n", + "\n", + "Complex pivot tables with multiple indices and custom aggregations." + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "=== MULTI-LEVEL INDEX PIVOT TABLES ===\n", + "Hierarchical pivot (Region > Salesperson vs Product Category):\n", + "product_category Books Clothing Electronics Home & Garden\n", + "region salesperson \n", + "East Alice Johnson 1537.0605 838.5400 1792.4760 1693.2745\n", + " Bob Smith 2039.4035 1156.4895 1110.9565 950.9340\n", + " Charlie Brown 435.2560 952.0800 2610.3275 1098.6095\n", + " Diana Prince 1874.3990 1845.6785 1898.2705 1610.8440\n", + " Eve Wilson 1585.4275 1534.6530 1395.2180 1179.5475\n", + " Frank Miller 1106.1950 1476.6940 945.8550 988.5600\n", + " Grace Lee 1556.9905 1298.2700 865.6170 938.2240\n", + " Henry Davis 875.1640 725.9365 2336.0330 1322.1800\n", + " Ivy Chen 797.3240 1608.6365 1110.4660 1724.6260\n", + " Jack Robinson 569.6290 1028.8020 2397.5045 2632.9480\n", + "North Alice Johnson 2314.9745 2757.4270 1911.9930 1856.1875\n", + " Bob Smith 1031.4940 489.7395 801.7940 796.1610\n", + " Charlie Brown 498.9095 1879.5460 1790.1620 1393.1530\n", + " Diana Prince 214.3950 1484.9535 1053.2905 2467.8815\n", + " Eve Wilson 2040.3085 1654.9070 1178.6505 1498.5810\n", + " Frank Miller 2509.5095 1490.6560 1264.5290 766.4540\n", + " Grace Lee 748.8710 1293.0970 833.7205 2235.9345\n", + " Henry Davis 815.0200 1089.6130 1072.8980 1280.2690\n", + " Ivy Chen 2411.9690 1079.7175 1762.1170 1244.5645\n", + " Jack Robinson 906.2425 1718.6920 3559.5100 4230.7635\n", + "\n", + "Hierarchical columns (Product Category > Customer Type):\n", + "product_category Books Clothing \\\n", + "customer_type New Returning VIP New Returning VIP \n", + "region \n", + "East 2077.0 7770.0 2530.0 2680.0 6856.0 2929.0 \n", + "North 2546.0 7153.0 3792.0 2959.0 7514.0 4465.0 \n", + "South 2831.0 10393.0 4366.0 6472.0 8685.0 1788.0 \n", + "West 3128.0 2906.0 2123.0 4974.0 8570.0 3000.0 \n", + "\n", + "product_category Electronics Home & Garden \n", + "customer_type New Returning VIP New Returning VIP \n", + "region \n", + "East 4562.0 7630.0 4271.0 3974.0 7367.0 2799.0 \n", + "North 6068.0 6693.0 2467.0 4970.0 10655.0 2145.0 \n", + "South 4919.0 6629.0 3004.0 2979.0 7527.0 3189.0 \n", + "West 3013.0 7416.0 2252.0 4220.0 7502.0 3929.0 \n", + "\n", + "Full hierarchical pivot (limited sample):\n", + "product_category Books Clothing \n", + "sales_channel Online Phone Store Online Phone Store\n", + "region quarter \n", + "East 1 2693.0 261.0 1483.0 1873.0 573.0 2045.0\n", + " 2 1735.0 233.0 1766.0 4208.0 644.0 529.0\n", + " 3 1358.0 0.0 1284.0 867.0 0.0 225.0\n", + " 4 1036.0 0.0 527.0 998.0 0.0 503.0\n", + "North 1 2012.0 0.0 1019.0 1987.0 948.0 1759.0\n", + " 2 1806.0 375.0 2181.0 1836.0 471.0 499.0\n", + " 3 2210.0 348.0 806.0 2702.0 305.0 311.0\n", + " 4 1649.0 0.0 1086.0 2437.0 0.0 1682.0\n" + ] + } + ], + "source": [ + "# Multi-level index pivot tables\n", + "print(\"=== MULTI-LEVEL INDEX PIVOT TABLES ===\")\n", + "\n", + "# Hierarchical rows\n", + "hierarchical_pivot = df_business.pivot_table(\n", + " values='net_sales',\n", + " index=['region', 'salesperson'],\n", + " columns='product_category',\n", + " aggfunc='sum',\n", + " fill_value=0\n", + ")\n", + "\n", + "print(\"Hierarchical pivot (Region > Salesperson vs Product Category):\")\n", + "print(hierarchical_pivot.head(20))\n", + "\n", + "# Hierarchical columns\n", + "hierarchical_cols_pivot = df_business.pivot_table(\n", + " values='net_sales',\n", + " index='region',\n", + " columns=['product_category', 'customer_type'],\n", + " aggfunc='sum',\n", + " fill_value=0\n", + ")\n", + "\n", + "print(\"\\nHierarchical columns (Product Category > Customer Type):\")\n", + "print(hierarchical_cols_pivot.round(0))\n", + "\n", + "# Both hierarchical rows and columns\n", + "full_hierarchical = df_business.pivot_table(\n", + " values='net_sales',\n", + " index=['region', 'quarter'],\n", + " columns=['product_category', 'sales_channel'],\n", + " aggfunc='sum',\n", + " fill_value=0\n", + ")\n", + "\n", + "print(\"\\nFull hierarchical pivot (limited sample):\")\n", + "print(full_hierarchical.iloc[:8, :6].round(0)) # Show subset for readability" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "=== CUSTOM AGGREGATION FUNCTIONS ===\n", + "Custom aggregations pivot table:\n", + " mean std \\\n", + "product_category Books Clothing Electronics Home & Garden Books Clothing \n", + "region \n", + "East 217.14 197.87 238.59 239.66 133.32 176.45 \n", + "North 214.15 226.34 220.71 257.54 144.81 157.82 \n", + "South 266.51 260.70 234.70 236.13 153.97 158.86 \n", + "West 189.69 239.77 218.63 244.56 125.47 163.81 \n", + "\n", + " coefficient_of_variation \\\n", + "product_category Electronics Home & Garden Books Clothing \n", + "region \n", + "East 154.31 150.89 0.61 0.89 \n", + "North 165.37 174.64 0.68 0.70 \n", + "South 175.74 165.01 0.58 0.61 \n", + "West 143.66 167.40 0.66 0.68 \n", + "\n", + " sales_range \\\n", + "product_category Electronics Home & Garden Books Clothing Electronics \n", + "region \n", + "East 0.65 0.63 648.70 758.44 697.50 \n", + "North 0.75 0.68 531.31 663.18 752.50 \n", + "South 0.75 0.70 634.55 641.48 773.92 \n", + "West 0.66 0.68 436.05 629.05 638.34 \n", + "\n", + " \n", + "product_category Home & Garden \n", + "region \n", + "East 583.40 \n", + "North 619.36 \n", + "South 659.43 \n", + "West 656.63 \n", + "\n", + "Lambda function aggregations:\n", + " \\\n", + "sales_channel Online Phone Store Online Phone Store Online \n", + "region \n", + "East 330.66 337.74 265.63 59 9 38 109.56 \n", + "North 340.40 317.78 358.12 72 15 34 318.50 \n", + "South 375.57 293.52 359.79 66 11 35 156.12 \n", + "West 352.13 323.46 267.42 69 8 22 54.42 \n", + "\n", + " mean \n", + "sales_channel Phone Store Online Phone Store \n", + "region \n", + "East 16.17 168.84 239.05 231.35 195.51 \n", + "North 72.98 38.41 227.94 235.62 232.47 \n", + "South 42.03 14.63 255.43 219.03 249.83 \n", + "West 53.92 53.58 237.52 197.14 205.17 \n" + ] + } + ], + "source": [ + "# Custom aggregation functions\n", + "print(\"=== CUSTOM AGGREGATION FUNCTIONS ===\")\n", + "\n", + "# Define custom aggregation functions\n", + "def coefficient_of_variation(series):\n", + " \"\"\"Calculate coefficient of variation (std/mean)\"\"\"\n", + " return series.std() / series.mean() if series.mean() != 0 else 0\n", + "\n", + "def sales_range(series):\n", + " \"\"\"Calculate range (max - min)\"\"\"\n", + " return series.max() - series.min()\n", + "\n", + "def high_value_count(series, threshold=100):\n", + " \"\"\"Count values above threshold\"\"\"\n", + " return (series > threshold).sum()\n", + "\n", + "# Apply custom aggregations\n", + "custom_agg_pivot = df_business.pivot_table(\n", + " values='net_sales',\n", + " index='region',\n", + " columns='product_category',\n", + " aggfunc=[np.mean, np.std, coefficient_of_variation, sales_range],\n", + " fill_value=0\n", + ")\n", + "\n", + "print(\"Custom aggregations pivot table:\")\n", + "print(custom_agg_pivot.round(2))\n", + "\n", + "# Lambda functions for inline custom aggregations\n", + "lambda_agg_pivot = df_business.pivot_table(\n", + " values='net_sales',\n", + " index='region',\n", + " columns='sales_channel',\n", + " aggfunc={\n", + " 'net_sales': [\n", + " 'mean',\n", + " lambda x: x.quantile(0.75), # 75th percentile\n", + " lambda x: (x > x.mean()).sum(), # Count above average\n", + " lambda x: x.max() / x.min() if x.min() > 0 else 0 # Max/Min ratio\n", + " ]\n", + " },\n", + " fill_value=0\n", + ")\n", + "\n", + "print(\"\\nLambda function aggregations:\")\n", + "print(lambda_agg_pivot.round(2))" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "=== TIME-BASED PIVOT TABLES ===\n", + "Monthly sales by product category:\n", + "product_category Books Clothing Electronics Home & Garden\n", + "month_name \n", + "January 4813.0 7034.0 4631.0 3365.0\n", + "February 4022.0 8492.0 3016.0 5593.0\n", + "March 6809.0 3461.0 5373.0 6898.0\n", + "April 5464.0 5000.0 5719.0 5014.0\n", + "May 5767.0 4023.0 5243.0 6586.0\n", + "June 4104.0 8360.0 7231.0 2890.0\n", + "July 5149.0 3183.0 6009.0 4328.0\n", + "August 3052.0 6048.0 6709.0 5114.0\n", + "September 4366.0 3026.0 4470.0 8162.0\n", + "October 3442.0 4719.0 2349.0 4465.0\n", + "November 2562.0 3316.0 3646.0 5111.0\n", + "December 2064.0 4231.0 4528.0 3729.0\n", + "\n", + "Day of week analysis:\n", + " net_sales quantity \n", + "sales_channel Online Phone Store Online Phone Store\n", + "day_of_week \n", + "Monday 18611.51 3866.50 10728.14 4.80 5.14 4.91\n", + "Tuesday 19606.58 3841.09 9615.84 4.90 5.75 4.68\n", + "Wednesday 22785.70 945.00 9312.78 5.14 4.60 5.00\n", + "Thursday 22420.93 5605.94 8475.84 5.35 5.75 4.89\n", + "Friday 21358.62 1274.59 11041.92 5.06 3.18 5.36\n", + "Saturday 24008.00 1851.37 6557.88 4.85 4.10 4.06\n", + "Sunday 19141.68 2598.45 9040.94 4.99 4.57 3.88\n" + ] + } + ], + "source": [ + "# Time-based pivot tables\n", + "print(\"=== TIME-BASED PIVOT TABLES ===\")\n", + "\n", + "# Monthly sales trends by product category\n", + "monthly_sales = df_business.pivot_table(\n", + " values='net_sales',\n", + " index='month_name',\n", + " columns='product_category',\n", + " aggfunc='sum',\n", + " fill_value=0\n", + ")\n", + "\n", + "# Reorder months correctly\n", + "month_order = ['January', 'February', 'March', 'April', 'May', 'June',\n", + " 'July', 'August', 'September', 'October', 'November', 'December']\n", + "monthly_sales = monthly_sales.reindex([m for m in month_order if m in monthly_sales.index])\n", + "\n", + "print(\"Monthly sales by product category:\")\n", + "print(monthly_sales.round(0))\n", + "\n", + "# Day of week analysis\n", + "dow_analysis = df_business.pivot_table(\n", + " values=['net_sales', 'quantity'],\n", + " index='day_of_week',\n", + " columns='sales_channel',\n", + " aggfunc={\n", + " 'net_sales': 'sum',\n", + " 'quantity': 'mean'\n", + " },\n", + " fill_value=0\n", + ")\n", + "\n", + "# Reorder days of week\n", + "day_order = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']\n", + "dow_analysis = dow_analysis.reindex([d for d in day_order if d in dow_analysis.index])\n", + "\n", + "print(\"\\nDay of week analysis:\")\n", + "print(dow_analysis.round(2))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 3. Cross-tabulation and Contingency Tables\n", + "\n", + "Analyzing relationships between categorical variables." + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "=== CROSS-TABULATION ===\n", + "Transaction count by Region and Customer Type:\n", + "customer_type New Returning VIP\n", + "region \n", + "East 60 128 60\n", + "North 70 144 53\n", + "South 68 138 45\n", + "West 61 119 54\n", + "\n", + "Total sales by Region and Customer Type:\n", + "customer_type New Returning VIP\n", + "region \n", + "East 13293.0 29623.0 12529.0\n", + "North 16544.0 32016.0 12869.0\n", + "South 17201.0 33234.0 12347.0\n", + "West 15336.0 26394.0 11304.0\n", + "\n", + "Transaction count by Product Category and Sales Channel (with totals):\n", + "sales_channel Online Phone Store Total\n", + "product_category \n", + "Books 140 15 74 229\n", + "Clothing 169 26 68 263\n", + "Electronics 166 21 71 258\n", + "Home & Garden 142 28 80 250\n", + "Total 617 90 293 1000\n" + ] + } + ], + "source": [ + "# Basic cross-tabulation\n", + "print(\"=== CROSS-TABULATION ===\")\n", + "\n", + "# Simple crosstab - count of transactions\n", + "basic_crosstab = pd.crosstab(df_business['region'], df_business['customer_type'])\n", + "print(\"Transaction count by Region and Customer Type:\")\n", + "print(basic_crosstab)\n", + "\n", + "# Crosstab with values (not just counts)\n", + "sales_crosstab = pd.crosstab(\n", + " df_business['region'],\n", + " df_business['customer_type'],\n", + " values=df_business['net_sales'],\n", + " aggfunc='sum'\n", + ")\n", + "print(\"\\nTotal sales by Region and Customer Type:\")\n", + "print(sales_crosstab.round(0))\n", + "\n", + "# Crosstab with margins (totals)\n", + "crosstab_with_margins = pd.crosstab(\n", + " df_business['product_category'],\n", + " df_business['sales_channel'],\n", + " margins=True,\n", + " margins_name='Total'\n", + ")\n", + "print(\"\\nTransaction count by Product Category and Sales Channel (with totals):\")\n", + "print(crosstab_with_margins)" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "=== NORMALIZED CROSS-TABULATION ===\n", + "Row percentages (Customer type distribution within each region):\n", + "customer_type New Returning VIP\n", + "region \n", + "East 24.2 51.6 24.2\n", + "North 26.2 53.9 19.9\n", + "South 27.1 55.0 17.9\n", + "West 26.1 50.9 23.1\n", + "\n", + "Column percentages (Region distribution within each customer type):\n", + "customer_type New Returning VIP\n", + "region \n", + "East 23.2 24.2 28.3\n", + "North 27.0 27.2 25.0\n", + "South 26.3 26.1 21.2\n", + "West 23.6 22.5 25.5\n", + "\n", + "Total percentages (Percentage of overall total):\n", + "customer_type New Returning VIP\n", + "region \n", + "East 6.0 12.8 6.0\n", + "North 7.0 14.4 5.3\n", + "South 6.8 13.8 4.5\n", + "West 6.1 11.9 5.4\n" + ] + } + ], + "source": [ + "# Normalized cross-tabulation (percentages)\n", + "print(\"=== NORMALIZED CROSS-TABULATION ===\")\n", + "\n", + "# Normalize by rows (percentage of each row)\n", + "crosstab_row_pct = pd.crosstab(\n", + " df_business['region'],\n", + " df_business['customer_type'],\n", + " normalize='index' # Normalize by rows\n", + ") * 100\n", + "\n", + "print(\"Row percentages (Customer type distribution within each region):\")\n", + "print(crosstab_row_pct.round(1))\n", + "\n", + "# Normalize by columns (percentage of each column)\n", + "crosstab_col_pct = pd.crosstab(\n", + " df_business['region'],\n", + " df_business['customer_type'],\n", + " normalize='columns' # Normalize by columns\n", + ") * 100\n", + "\n", + "print(\"\\nColumn percentages (Region distribution within each customer type):\")\n", + "print(crosstab_col_pct.round(1))\n", + "\n", + "# Normalize by total (percentage of grand total)\n", + "crosstab_total_pct = pd.crosstab(\n", + " df_business['region'],\n", + " df_business['customer_type'],\n", + " normalize='all' # Normalize by total\n", + ") * 100\n", + "\n", + "print(\"\\nTotal percentages (Percentage of overall total):\")\n", + "print(crosstab_total_pct.round(1))" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "=== MULTI-DIMENSIONAL CROSS-TABULATION ===\n", + "Three-way analysis (Region & Product Category vs Customer Type):\n", + "customer_type New Returning VIP\n", + "region product_category \n", + "East Books 188.82 228.53 210.82\n", + " Clothing 206.15 221.17 154.18\n", + " Electronics 240.11 224.41 266.92\n", + " Home & Garden 233.78 254.03 215.29\n", + "North Books 254.64 183.41 270.87\n", + " Clothing 211.37 208.73 279.06\n", + " Electronics 233.40 230.79 176.23\n", + " Home & Garden 248.49 266.38 238.33\n", + "South Books 202.21 273.49 311.87\n", + " Clothing 340.66 234.73 198.67\n", + " Electronics 245.96 220.96 250.30\n", + " Home & Garden 198.59 228.10 318.92\n", + "West Books 223.46 193.70 151.62\n", + " Clothing 248.72 231.62 250.01\n", + " Electronics 251.07 211.88 204.71\n", + " Home & Garden 281.34 234.44 231.13\n", + "\n", + "Multiple aggregations crosstab:\n", + " count sum mean \\\n", + "sales_channel Online Phone Store Online Phone Store Online Phone \n", + "region \n", + "East 145 18 85 34662.14 4164.23 16618.73 239.05 231.35 \n", + "North 161 28 78 36698.38 6597.29 18133.00 227.94 235.62 \n", + "South 151 25 75 38569.20 5475.73 18737.36 255.43 219.03 \n", + "West 160 19 55 38003.32 3745.70 11284.26 237.52 197.14 \n", + "\n", + " \n", + "sales_channel Store \n", + "region \n", + "East 195.51 \n", + "North 232.47 \n", + "South 249.83 \n", + "West 205.17 \n" + ] + } + ], + "source": [ + "# Multi-dimensional cross-tabulation\n", + "print(\"=== MULTI-DIMENSIONAL CROSS-TABULATION ===\")\n", + "\n", + "# Three-way crosstab\n", + "three_way_crosstab = pd.crosstab(\n", + " [df_business['region'], df_business['product_category']],\n", + " df_business['customer_type'],\n", + " values=df_business['net_sales'],\n", + " aggfunc='mean'\n", + ")\n", + "\n", + "print(\"Three-way analysis (Region & Product Category vs Customer Type):\")\n", + "print(three_way_crosstab.round(2))\n", + "\n", + "# Analysis with multiple aggregations\n", + "multi_agg_crosstab = pd.crosstab(\n", + " df_business['region'],\n", + " df_business['sales_channel'],\n", + " values=df_business['net_sales'],\n", + " aggfunc=['count', 'sum', 'mean']\n", + ")\n", + "\n", + "print(\"\\nMultiple aggregations crosstab:\")\n", + "print(multi_agg_crosstab.round(2))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 4. Data Reshaping: Melt, Pivot, Stack, Unstack\n", + "\n", + "Transforming data between wide and long formats." + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "=== MELTING DATA (WIDE TO LONG) ===\n", + "Wide format data:\n", + "product_category salesperson Books Clothing Electronics \\\n", + "0 Alice Johnson 7260.4360 7069.2950 8358.1560 \n", + "1 Bob Smith 4511.1555 4336.2970 3679.3625 \n", + "2 Charlie Brown 3677.6895 7830.9720 10264.1605 \n", + "3 Diana Prince 4861.2975 6275.8940 3348.5570 \n", + "4 Eve Wilson 6363.1345 7208.5765 5191.2705 \n", + "\n", + "product_category Home & Garden \n", + "0 5283.4600 \n", + "1 5164.6310 \n", + "2 4281.9240 \n", + "3 6108.1495 \n", + "4 6582.1870 \n", + "\n", + "Long format data (melted):\n", + " salesperson product_category total_sales\n", + "0 Alice Johnson Books 7260.4360\n", + "1 Bob Smith Books 4511.1555\n", + "2 Charlie Brown Books 3677.6895\n", + "3 Diana Prince Books 4861.2975\n", + "4 Eve Wilson Books 6363.1345\n", + "5 Frank Miller Books 5489.7555\n", + "6 Grace Lee Books 5945.8655\n", + "7 Henry Davis Books 5273.0950\n", + "8 Ivy Chen Books 4812.9940\n", + "9 Jack Robinson Books 3419.5195\n", + "\n", + "Complex wide format:\n", + "product_category region salesperson Books Clothing Electronics \\\n", + "0 East Alice Johnson 1537.0605 838.5400 1792.4760 \n", + "1 East Bob Smith 2039.4035 1156.4895 1110.9565 \n", + "2 East Charlie Brown 435.2560 952.0800 2610.3275 \n", + "3 East Diana Prince 1874.3990 1845.6785 1898.2705 \n", + "4 East Eve Wilson 1585.4275 1534.6530 1395.2180 \n", + "\n", + "product_category Home & Garden \n", + "0 1693.2745 \n", + "1 950.9340 \n", + "2 1098.6095 \n", + "3 1610.8440 \n", + "4 1179.5475 \n", + "\n", + "Complex long format:\n", + " region salesperson product_category total_sales\n", + "0 East Alice Johnson Books 1537.0605\n", + "1 East Bob Smith Books 2039.4035\n", + "2 East Charlie Brown Books 435.2560\n", + "3 East Diana Prince Books 1874.3990\n", + "4 East Eve Wilson Books 1585.4275\n", + "5 East Frank Miller Books 1106.1950\n", + "6 East Grace Lee Books 1556.9905\n", + "7 East Henry Davis Books 875.1640\n", + "8 East Ivy Chen Books 797.3240\n", + "9 East Jack Robinson Books 569.6290\n" + ] + } + ], + "source": [ + "# Melting data from wide to long format\n", + "print(\"=== MELTING DATA (WIDE TO LONG) ===\")\n", + "\n", + "# Create a wide format dataset first\n", + "wide_sales = df_business.pivot_table(\n", + " values='net_sales',\n", + " index='salesperson',\n", + " columns='product_category',\n", + " aggfunc='sum',\n", + " fill_value=0\n", + ").reset_index()\n", + "\n", + "print(\"Wide format data:\")\n", + "print(wide_sales.head())\n", + "\n", + "# Melt to long format\n", + "long_sales = pd.melt(\n", + " wide_sales,\n", + " id_vars=['salesperson'],\n", + " var_name='product_category',\n", + " value_name='total_sales'\n", + ")\n", + "\n", + "print(\"\\nLong format data (melted):\")\n", + "print(long_sales.head(10))\n", + "\n", + "# Melt with multiple ID variables\n", + "# First create a more complex wide dataset\n", + "complex_wide = df_business.groupby(['region', 'salesperson', 'product_category'])['net_sales'].sum().unstack(fill_value=0).reset_index()\n", + "\n", + "print(\"\\nComplex wide format:\")\n", + "print(complex_wide.head())\n", + "\n", + "# Melt with multiple ID vars\n", + "complex_long = pd.melt(\n", + " complex_wide,\n", + " id_vars=['region', 'salesperson'],\n", + " var_name='product_category',\n", + " value_name='total_sales'\n", + ")\n", + "\n", + "print(\"\\nComplex long format:\")\n", + "print(complex_long.head(10))" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "=== PIVOT OPERATION (LONG TO WIDE) ===\n", + "Long format data sample:\n", + " region product_category customer_type net_sales\n", + "0 North Clothing Returning 72.810\n", + "1 East Clothing Returning 764.847\n", + "2 West Books VIP 27.660\n", + "3 West Clothing New 252.080\n", + "4 East Clothing VIP 220.014\n", + "\n", + "Pivoted to wide format:\n", + "product_category Books Clothing Electronics Home & Garden\n", + "region \n", + "East 12377.0 12466.0 16463.0 14140.0\n", + "North 13492.0 14938.0 15229.0 17770.0\n", + "South 17590.0 16946.0 14552.0 13695.0\n", + "West 8157.0 16544.0 12681.0 15652.0\n", + "\n", + "Pivoted with reset index:\n", + "product_category region Books Clothing Electronics Home & Garden\n", + "0 East 12376.8490 12465.7800 16462.7240 14139.7475\n", + "1 North 13491.6935 14938.3485 15228.6645 17769.9495\n", + "2 South 17589.7640 16945.5435 14551.5775 13695.4025\n", + "3 West 8156.6360 16544.4195 12680.5820 15651.6335\n" + ] + } + ], + "source": [ + "# Pivot operation (long to wide)\n", + "print(\"=== PIVOT OPERATION (LONG TO WIDE) ===\")\n", + "\n", + "# Create long format data\n", + "long_data = df_business[['region', 'product_category', 'customer_type', 'net_sales']].copy()\n", + "print(\"Long format data sample:\")\n", + "print(long_data.head())\n", + "\n", + "# Simple pivot\n", + "pivoted_data = long_data.pivot_table(\n", + " values='net_sales',\n", + " index='region',\n", + " columns='product_category',\n", + " aggfunc='sum'\n", + ")\n", + "\n", + "print(\"\\nPivoted to wide format:\")\n", + "print(pivoted_data.round(0))\n", + "\n", + "# Reset index to make it a regular DataFrame\n", + "pivoted_reset = pivoted_data.reset_index()\n", + "print(\"\\nPivoted with reset index:\")\n", + "print(pivoted_reset.head())" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "=== STACK AND UNSTACK OPERATIONS ===\n", + "Multi-index DataFrame:\n", + "customer_type New Returning VIP\n", + "region product_category \n", + "East Books 2077.0530 7769.9840 2529.8120\n", + " Clothing 2679.9640 6856.4155 2929.4005\n", + " Electronics 4562.0515 7629.9035 4270.7690\n", + " Home & Garden 3974.1930 7366.8250 2798.7295\n", + "North Books 2546.4320 7153.0835 3792.1780\n", + " Clothing 2959.1455 7514.3230 4464.8800\n", + " Electronics 6068.4815 6692.9180 2467.2650\n", + " Home & Garden 4969.7755 10655.2095 2144.9645\n", + "South Books 2830.9150 10392.7060 4366.1430\n", + " Clothing 6472.4515 8685.0290 1788.0630\n", + "\n", + "After stacking (columns become rows):\n", + "region product_category customer_type\n", + "East Books New 2077.0530\n", + " Returning 7769.9840\n", + " VIP 2529.8120\n", + " Clothing New 2679.9640\n", + " Returning 6856.4155\n", + " VIP 2929.4005\n", + " Electronics New 4562.0515\n", + " Returning 7629.9035\n", + " VIP 4270.7690\n", + " Home & Garden New 3974.1930\n", + "dtype: float64\n", + "\n", + "After unstacking (back to original):\n", + "customer_type New Returning VIP\n", + "region product_category \n", + "East Books 2077.0530 7769.9840 2529.8120\n", + " Clothing 2679.9640 6856.4155 2929.4005\n", + " Electronics 4562.0515 7629.9035 4270.7690\n", + " Home & Garden 3974.1930 7366.8250 2798.7295\n", + "North Books 2546.4320 7153.0835 3792.1780\n", + " Clothing 2959.1455 7514.3230 4464.8800\n", + " Electronics 6068.4815 6692.9180 2467.2650\n", + " Home & Garden 4969.7755 10655.2095 2144.9645\n", + "South Books 2830.9150 10392.7060 4366.1430\n", + " Clothing 6472.4515 8685.0290 1788.0630\n", + "\n", + "Unstacking level 0 (region):\n", + "region East North South West\n", + "product_category customer_type \n", + "Books New 2077.0530 2546.4320 2830.9150 3128.4435\n", + " Returning 7769.9840 7153.0835 10392.7060 2905.5140\n", + " VIP 2529.8120 3792.1780 4366.1430 2122.6785\n", + "Clothing New 2679.9640 2959.1455 6472.4515 4974.4750\n", + " Returning 6856.4155 7514.3230 8685.0290 8569.8735\n" + ] + } + ], + "source": [ + "# Stack and Unstack operations\n", + "print(\"=== STACK AND UNSTACK OPERATIONS ===\")\n", + "\n", + "# Create a DataFrame with MultiIndex\n", + "multi_index_df = df_business.groupby(['region', 'product_category', 'customer_type'])['net_sales'].sum().reset_index()\n", + "multi_pivot = multi_index_df.pivot_table(\n", + " values='net_sales',\n", + " index=['region', 'product_category'],\n", + " columns='customer_type',\n", + " fill_value=0\n", + ")\n", + "\n", + "print(\"Multi-index DataFrame:\")\n", + "print(multi_pivot.head(10))\n", + "\n", + "# Stack operation (columns to rows)\n", + "stacked = multi_pivot.stack()\n", + "print(\"\\nAfter stacking (columns become rows):\")\n", + "print(stacked.head(10))\n", + "\n", + "# Unstack operation (rows to columns)\n", + "unstacked = stacked.unstack()\n", + "print(\"\\nAfter unstacking (back to original):\")\n", + "print(unstacked.head(10))\n", + "\n", + "# Unstack different levels\n", + "unstacked_level0 = multi_pivot.stack().unstack(level=0)\n", + "print(\"\\nUnstacking level 0 (region):\")\n", + "print(unstacked_level0.head())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 5. Advanced Reshaping Techniques\n", + "\n", + "Complex data transformations for specialized analysis." + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "=== MULTIPLE VALUE COLUMNS MELTING ===\n", + "Wide format with multiple metrics:\n", + " region product_category net_sales quantity total_order\n", + "0 East Books 12376.8490 277 225.021912\n", + "1 East Clothing 12465.7800 266 205.978571\n", + "2 East Electronics 16462.7240 349 245.975420\n", + "3 East Home & Garden 14139.7475 301 247.297246\n", + "4 North Books 13491.6935 296 221.647357\n", + "\n", + "Long format with multiple metrics:\n", + " region product_category metric value\n", + "0 East Books net_sales 12376.8490\n", + "1 East Clothing net_sales 12465.7800\n", + "2 East Electronics net_sales 16462.7240\n", + "3 East Home & Garden net_sales 14139.7475\n", + "4 North Books net_sales 13491.6935\n", + "5 North Clothing net_sales 14938.3485\n", + "6 North Electronics net_sales 15228.6645\n", + "7 North Home & Garden net_sales 17769.9495\n", + "8 South Books net_sales 17589.7640\n", + "9 South Clothing net_sales 16945.5435\n", + "10 South Electronics net_sales 14551.5775\n", + "11 South Home & Garden net_sales 13695.4025\n", + "12 West Books net_sales 8156.6360\n", + "13 West Clothing net_sales 16544.4195\n", + "14 West Electronics net_sales 12680.5820\n", + "\n", + "Reshaping: Region-Metric vs Product Category:\n", + "product_category Books Clothing Electronics Home & Garden\n", + "region metric \n", + "East net_sales 12376.85 12465.78 16462.72 14139.75\n", + " quantity 277.00 266.00 349.00 301.00\n", + " total_order 225.02 205.98 245.98 247.30\n", + "North net_sales 13491.69 14938.35 15228.66 17769.95\n", + " quantity 296.00 313.00 320.00 362.00\n", + " total_order 221.65 234.40 228.86 265.49\n", + "South net_sales 17589.76 16945.54 14551.58 13695.40\n", + " quantity 356.00 327.00 301.00 291.00\n", + " total_order 274.84 269.23 243.38 244.65\n", + "West net_sales 8156.64 16544.42 12680.58 15651.63\n", + " quantity 217.00 365.00 271.00 299.00\n", + " total_order 198.11 246.91 226.43 253.21\n" + ] + } + ], + "source": [ + "# Wide to long with multiple value columns\n", + "print(\"=== MULTIPLE VALUE COLUMNS MELTING ===\")\n", + "\n", + "# Create dataset with multiple metrics\n", + "metrics_wide = df_business.groupby(['region', 'product_category']).agg({\n", + " 'net_sales': 'sum',\n", + " 'quantity': 'sum',\n", + " 'total_order': 'mean'\n", + "}).reset_index()\n", + "\n", + "print(\"Wide format with multiple metrics:\")\n", + "print(metrics_wide.head())\n", + "\n", + "# Melt multiple value columns\n", + "metrics_long = pd.melt(\n", + " metrics_wide,\n", + " id_vars=['region', 'product_category'],\n", + " value_vars=['net_sales', 'quantity', 'total_order'],\n", + " var_name='metric',\n", + " value_name='value'\n", + ")\n", + "\n", + "print(\"\\nLong format with multiple metrics:\")\n", + "print(metrics_long.head(15))\n", + "\n", + "# Alternative: melt and then pivot for different structure\n", + "metrics_pivot = metrics_long.pivot_table(\n", + " values='value',\n", + " index=['region', 'metric'],\n", + " columns='product_category',\n", + " fill_value=0\n", + ")\n", + "\n", + "print(\"\\nReshaping: Region-Metric vs Product Category:\")\n", + "print(metrics_pivot.round(2))" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "=== TIME SERIES PIVOT TABLES ===\n", + "Daily sales pivot (first 10 days):\n", + "product_category Books Clothing Electronics Home & Garden\n", + "date \n", + "2024-01-01 NaN 73.0 NaN NaN\n", + "2024-01-02 NaN 765.0 NaN NaN\n", + "2024-01-03 28.0 NaN NaN NaN\n", + "2024-01-04 NaN 252.0 NaN NaN\n", + "2024-01-05 NaN 220.0 NaN NaN\n", + "2024-01-06 112.0 NaN NaN NaN\n", + "2024-01-07 NaN NaN NaN 172.0\n", + "2024-01-08 NaN NaN 201.0 NaN\n", + "2024-01-09 NaN NaN NaN 153.0\n", + "2024-01-10 24.0 NaN NaN NaN\n", + "\n", + "7-day rolling average (sample):\n", + "product_category Books Clothing Electronics Home & Garden\n", + "date \n", + "2026-09-22 32.15 0.0 60.29 191.03\n", + "2026-09-23 32.15 0.0 60.29 184.35\n", + "2026-09-24 18.09 0.0 84.62 184.35\n", + "2026-09-25 57.20 0.0 84.62 89.29\n", + "2026-09-26 57.20 0.0 113.45 89.29\n", + "\n", + "Monthly sales:\n", + "product_category Books Clothing Electronics Home & Garden\n", + "period \n", + "2024-01 1393.0 2789.0 983.0 1657.0\n", + "2024-02 2322.0 2198.0 877.0 2445.0\n", + "2024-03 2665.0 1029.0 1393.0 1555.0\n", + "2024-04 1722.0 1728.0 2782.0 2171.0\n", + "2024-05 2191.0 1795.0 1068.0 2092.0\n", + "... ... ... ... ...\n", + "2026-05 1651.0 1084.0 2314.0 2334.0\n", + "2026-06 2077.0 2676.0 1940.0 261.0\n", + "2026-07 1445.0 811.0 2807.0 1512.0\n", + "2026-08 104.0 2701.0 2219.0 1606.0\n", + "2026-09 1570.0 285.0 1295.0 3671.0\n", + "\n", + "[33 rows x 4 columns]\n", + "\n", + "Month-over-month growth (%):\n", + "product_category Books Clothing Electronics Home & Garden\n", + "period \n", + "2024-01 NaN NaN NaN NaN\n", + "2024-02 66.7 -21.2 -10.7 47.5\n", + "2024-03 14.8 -53.2 58.8 -36.4\n", + "2024-04 -35.4 67.9 99.7 39.6\n", + "2024-05 27.2 3.9 -61.6 -3.6\n", + "... ... ... ... ...\n", + "2026-05 -49.0 -24.0 88.6 203.8\n", + "2026-06 25.8 146.9 -16.2 -88.8\n", + "2026-07 -30.4 -69.7 44.7 479.7\n", + "2026-08 -92.8 233.0 -21.0 6.2\n", + "2026-09 1403.9 -89.4 -41.6 128.5\n", + "\n", + "[33 rows x 4 columns]\n" + ] + } + ], + "source": [ + "# Creating time series pivot tables\n", + "print(\"=== TIME SERIES PIVOT TABLES ===\")\n", + "\n", + "# Daily sales by product category\n", + "daily_sales = df_business.groupby(['date', 'product_category'])['net_sales'].sum().reset_index()\n", + "daily_pivot = daily_sales.pivot(index='date', columns='product_category', values='net_sales')\n", + "\n", + "print(\"Daily sales pivot (first 10 days):\")\n", + "print(daily_pivot.head(10).round(0))\n", + "\n", + "# Fill missing values and calculate rolling averages\n", + "daily_pivot_filled = daily_pivot.fillna(0)\n", + "rolling_avg = daily_pivot_filled.rolling(window=7).mean()\n", + "\n", + "print(\"\\n7-day rolling average (sample):\")\n", + "print(rolling_avg.tail(5).round(2))\n", + "\n", + "# Month-over-month growth\n", + "monthly_sales = df_business.groupby(['year', 'month', 'product_category'])['net_sales'].sum().reset_index()\n", + "monthly_sales['period'] = monthly_sales['year'].astype(str) + '-' + monthly_sales['month'].astype(str).str.zfill(2)\n", + "monthly_pivot = monthly_sales.pivot(index='period', columns='product_category', values='net_sales')\n", + "\n", + "print(\"\\nMonthly sales:\")\n", + "print(monthly_pivot.round(0))\n", + "\n", + "# Calculate month-over-month growth\n", + "mom_growth = monthly_pivot.pct_change() * 100\n", + "print(\"\\nMonth-over-month growth (%):\")\n", + "print(mom_growth.round(1))" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "=== COMPLEX MULTI-LEVEL RESHAPING ===\n", + "Complex hierarchical data:\n", + " region salesperson quarter product_category net_sales quantity\n", + "0 East Alice Johnson 1 Books 710.9865 14\n", + "1 East Alice Johnson 1 Home & Garden 428.9600 8\n", + "2 East Alice Johnson 2 Books 240.5520 5\n", + "3 East Alice Johnson 2 Clothing 436.8720 7\n", + "4 East Alice Johnson 2 Electronics 750.9260 20\n", + "5 East Alice Johnson 2 Home & Garden 96.1745 2\n", + "6 East Alice Johnson 3 Books 223.0520 3\n", + "7 East Alice Johnson 3 Clothing 128.8600 2\n", + "8 East Alice Johnson 3 Electronics 567.8100 9\n", + "9 East Alice Johnson 3 Home & Garden 655.3200 12\n", + "\n", + "Salesperson performance by quarter and category:\n", + "product_category Books Clothing Electronics Home & Garden\n", + "region salesperson quarter \n", + "East Alice Johnson 1 710.9865 0.0000 0.0000 428.9600\n", + " 2 240.5520 436.8720 750.9260 96.1745\n", + " 3 223.0520 128.8600 567.8100 655.3200\n", + " 4 362.4700 272.8080 473.7400 512.8200\n", + " Bob Smith 1 775.7800 360.9700 0.0000 698.4970\n", + " 2 729.5005 561.8205 305.0745 10.1970\n", + " 3 534.1230 233.6990 0.0000 0.0000\n", + " 4 0.0000 0.0000 805.8820 242.2400\n", + " Charlie Brown 1 0.0000 675.4200 672.7680 220.9600\n", + " 2 141.1830 97.4520 1063.2360 689.1900\n", + " 3 233.1210 0.0000 455.7235 31.8835\n", + " 4 60.9520 179.2080 418.6000 156.5760\n", + " Diana Prince 1 294.5120 1280.4690 542.1370 58.3920\n", + " 2 1364.7370 244.2495 183.4500 1018.4200\n", + " 3 215.1500 320.9600 610.4835 266.0820\n", + "\n", + "Quarterly comparison view (sample):\n", + "product_category Books Clothing \\\n", + "quarter 1 2 3 4 1 2 3 \n", + "region salesperson \n", + "East Alice Johnson 711.0 241.0 223.0 362.0 0.0 437.0 129.0 \n", + " Bob Smith 776.0 730.0 534.0 0.0 361.0 562.0 234.0 \n", + " Charlie Brown 0.0 141.0 233.0 61.0 675.0 97.0 0.0 \n", + " Diana Prince 295.0 1365.0 215.0 0.0 1280.0 244.0 321.0 \n", + " Eve Wilson 0.0 501.0 281.0 803.0 505.0 942.0 88.0 \n", + "\n", + "product_category \n", + "quarter 4 \n", + "region salesperson \n", + "East Alice Johnson 273.0 \n", + " Bob Smith 0.0 \n", + " Charlie Brown 179.0 \n", + " Diana Prince 0.0 \n", + " Eve Wilson 0.0 \n", + "\n", + "Region summary:\n", + "product_category Books Clothing Electronics Home & Garden\n", + "region \n", + "East 12377.0 12466.0 16463.0 14140.0\n", + "North 13492.0 14938.0 15229.0 17770.0\n", + "South 17590.0 16946.0 14552.0 13695.0\n", + "West 8157.0 16544.0 12681.0 15652.0\n" + ] + } + ], + "source": [ + "# Complex multi-level reshaping\n", + "print(\"=== COMPLEX MULTI-LEVEL RESHAPING ===\")\n", + "\n", + "# Create complex hierarchical data\n", + "complex_data = df_business.groupby(['region', 'salesperson', 'quarter', 'product_category']).agg({\n", + " 'net_sales': 'sum',\n", + " 'quantity': 'sum'\n", + "}).reset_index()\n", + "\n", + "print(\"Complex hierarchical data:\")\n", + "print(complex_data.head(10))\n", + "\n", + "# Multiple pivot operations\n", + "# First pivot: Quarter vs Product Category for each salesperson\n", + "salesperson_pivot = complex_data.pivot_table(\n", + " values='net_sales',\n", + " index=['region', 'salesperson', 'quarter'],\n", + " columns='product_category',\n", + " fill_value=0\n", + ")\n", + "\n", + "print(\"\\nSalesperson performance by quarter and category:\")\n", + "print(salesperson_pivot.head(15))\n", + "\n", + "# Stack and unstack for different views\n", + "# Unstack quarter to see quarterly comparison\n", + "quarterly_comparison = salesperson_pivot.unstack(level=2)\n", + "\n", + "print(\"\\nQuarterly comparison view (sample):\")\n", + "print(quarterly_comparison.iloc[:5, :8].round(0)) # Show subset\n", + "\n", + "# Create summary by region\n", + "region_summary = salesperson_pivot.groupby('region').sum()\n", + "print(\"\\nRegion summary:\")\n", + "print(region_summary.round(0))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 6. Business Intelligence Applications\n", + "\n", + "Real-world business analysis using pivot tables and reshaping." + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "=== SALES PERFORMANCE DASHBOARD ===\n", + "1. Regional Performance Summary:\n", + " net_sales quantity total_order\n", + " mean sum sum count\n", + "region \n", + "East 223.57 55445.10 1193 248\n", + "North 230.07 61428.66 1291 267\n", + "South 250.13 62782.29 1275 251\n", + "West 226.64 53033.27 1152 234\n", + "\n", + "2. Category Performance by Quarter:\n", + "quarter 1 2 3 4 All\n", + "product_category \n", + "Books 15644.0 15334.0 12567.0 8069.0 51615.0\n", + "Clothing 18988.0 17384.0 12257.0 12266.0 60894.0\n", + "Electronics 13021.0 18192.0 17188.0 10523.0 58924.0\n", + "Home & Garden 15857.0 14490.0 17604.0 13305.0 61257.0\n", + "All 63509.0 65400.0 59617.0 44163.0 232689.0\n", + "\n", + "3. Sales Channel Analysis:\n", + " sum mean \n", + "customer_type New Returning VIP New Returning VIP\n", + "sales_channel \n", + "Online 39805.26 76878.78 31248.99 255.16 238.01 226.44\n", + "Phone 5438.57 10409.81 4134.56 236.46 212.45 229.70\n", + "Store 17130.46 33977.55 13665.33 214.13 216.42 244.02\n", + "\n", + "4. Top 5 Performers:\n", + " net_sales quantity total_order\n", + "salesperson \n", + "Alice Johnson 27971.35 550 118\n", + "Jack Robinson 27955.98 564 107\n", + "Charlie Brown 26054.75 529 110\n", + "Eve Wilson 25345.17 553 107\n", + "Frank Miller 24301.05 491 96\n" + ] + } + ], + "source": [ + "# Sales performance dashboard\n", + "print(\"=== SALES PERFORMANCE DASHBOARD ===\")\n", + "\n", + "def create_sales_dashboard(df):\n", + " \"\"\"Create comprehensive sales dashboard using pivot tables\"\"\"\n", + " dashboard = {}\n", + " \n", + " # 1. Regional performance summary\n", + " dashboard['regional_summary'] = df.pivot_table(\n", + " values=['net_sales', 'quantity', 'total_order'],\n", + " index='region',\n", + " aggfunc={\n", + " 'net_sales': ['sum', 'mean'],\n", + " 'quantity': 'sum',\n", + " 'total_order': 'count'\n", + " }\n", + " ).round(2)\n", + " \n", + " # 2. Product category performance\n", + " dashboard['category_performance'] = df.pivot_table(\n", + " values='net_sales',\n", + " index='product_category',\n", + " columns='quarter',\n", + " aggfunc='sum',\n", + " margins=True,\n", + " fill_value=0\n", + " ).round(0)\n", + " \n", + " # 3. Sales channel analysis\n", + " dashboard['channel_analysis'] = df.pivot_table(\n", + " values='net_sales',\n", + " index='sales_channel',\n", + " columns='customer_type',\n", + " aggfunc=['sum', 'mean'],\n", + " fill_value=0\n", + " ).round(2)\n", + " \n", + " # 4. Top performers\n", + " salesperson_performance = df.groupby('salesperson').agg({\n", + " 'net_sales': 'sum',\n", + " 'quantity': 'sum',\n", + " 'total_order': 'count'\n", + " }).round(2)\n", + " dashboard['top_performers'] = salesperson_performance.sort_values('net_sales', ascending=False).head(5)\n", + " \n", + " # 5. Monthly trends\n", + " dashboard['monthly_trends'] = df.pivot_table(\n", + " values='net_sales',\n", + " index='month_name',\n", + " columns='product_category',\n", + " aggfunc='sum',\n", + " fill_value=0\n", + " ).round(0)\n", + " \n", + " return dashboard\n", + "\n", + "# Generate dashboard\n", + "sales_dashboard = create_sales_dashboard(df_business)\n", + "\n", + "print(\"1. Regional Performance Summary:\")\n", + "print(sales_dashboard['regional_summary'])\n", + "\n", + "print(\"\\n2. Category Performance by Quarter:\")\n", + "print(sales_dashboard['category_performance'])\n", + "\n", + "print(\"\\n3. Sales Channel Analysis:\")\n", + "print(sales_dashboard['channel_analysis'])\n", + "\n", + "print(\"\\n4. Top 5 Performers:\")\n", + "print(sales_dashboard['top_performers'])" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "=== CUSTOMER SEGMENTATION ANALYSIS ===\n", + "Customer behavior by type and channel:\n", + " discount_amount net_sales \\\n", + "sales_channel Online Phone Store Online Phone Store \n", + "customer_type \n", + "New 11.73 15.24 10.61 255.16 236.46 214.13 \n", + "Returning 11.45 9.43 11.81 238.01 212.45 216.42 \n", + "VIP 11.36 15.36 15.59 226.44 229.70 244.02 \n", + "\n", + " quantity \n", + "sales_channel Online Phone Store \n", + "customer_type \n", + "New 5.29 4.78 4.47 \n", + "Returning 4.88 5.14 4.62 \n", + "VIP 5.03 4.44 5.21 \n", + "\n", + "Purchase patterns (% of quarterly sales):\n", + "quarter 1 2 3 4\n", + "customer_type product_category \n", + "New Books 6.2 4.4 3.1 4.3\n", + " Clothing 6.9 7.2 5.7 10.5\n", + " Electronics 4.3 9.5 9.7 8.6\n", + " Home & Garden 8.8 3.7 4.5 12.4\n", + "Returning Books 13.2 13.8 9.8 11.3\n", + " Clothing 16.2 12.8 12.3 12.7\n", + " Electronics 11.2 13.8 11.9 11.6\n", + " Home & Garden 10.2 15.3 17.7 13.7\n", + "VIP Books 5.2 5.3 8.1 2.7\n", + " Clothing 6.8 6.5 2.6 4.6\n", + " Electronics 4.9 4.5 7.2 3.6\n", + " Home & Garden 6.1 3.2 7.3 4.1\n", + "\n", + "Customer value distribution:\n", + " count mean std min 25% 50% 75% max\n", + "customer_type \n", + "New 259.0 248.78 155.20 18.31 128.74 217.77 345.09 768.32\n", + "Returning 529.0 237.24 160.52 9.76 109.41 201.91 342.29 798.01\n", + "VIP 212.0 239.56 158.12 25.92 115.17 198.15 357.37 673.39\n" + ] + } + ], + "source": [ + "# Customer segmentation analysis\n", + "print(\"=== CUSTOMER SEGMENTATION ANALYSIS ===\")\n", + "\n", + "# Customer behavior analysis\n", + "customer_behavior = df_business.pivot_table(\n", + " values=['net_sales', 'quantity', 'discount_amount'],\n", + " index='customer_type',\n", + " columns='sales_channel',\n", + " aggfunc={\n", + " 'net_sales': 'mean',\n", + " 'quantity': 'mean',\n", + " 'discount_amount': 'mean'\n", + " },\n", + " fill_value=0\n", + ")\n", + "\n", + "print(\"Customer behavior by type and channel:\")\n", + "print(customer_behavior.round(2))\n", + "\n", + "# Purchase patterns analysis\n", + "purchase_patterns = pd.crosstab(\n", + " [df_business['customer_type'], df_business['product_category']],\n", + " df_business['quarter'],\n", + " values=df_business['net_sales'],\n", + " aggfunc='sum',\n", + " normalize='columns'\n", + ") * 100\n", + "\n", + "print(\"\\nPurchase patterns (% of quarterly sales):\")\n", + "print(purchase_patterns.round(1))\n", + "\n", + "# Customer value distribution\n", + "value_distribution = df_business.groupby('customer_type')['total_order'].describe()\n", + "print(\"\\nCustomer value distribution:\")\n", + "print(value_distribution.round(2))" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "=== COHORT ANALYSIS ===\n", + "Cohort analysis (average sales by quarters since first purchase):\n", + "periods_since_first 0 1 2 3\n", + "first_quarter \n", + "1 234.0 240.0 219.0 240.0\n", + "\n", + "Cohort retention (number of active salespeople):\n", + "periods_since_first 0 1 2 3\n", + "first_quarter \n", + "1 271 273 272 184\n", + "\n", + "Retention rates (%):\n", + "periods_since_first 0 1 2 3\n", + "first_quarter \n", + "1 100.0 100.7 100.4 67.9\n" + ] + } + ], + "source": [ + "# Cohort analysis using pivot tables\n", + "print(\"=== COHORT ANALYSIS ===\")\n", + "\n", + "# Simplified cohort analysis by quarter\n", + "# Group customers by their first purchase quarter\n", + "customer_first_purchase = df_business.groupby('salesperson')['quarter'].min().reset_index()\n", + "customer_first_purchase.columns = ['salesperson', 'first_quarter']\n", + "\n", + "# Merge back to get cohort information\n", + "cohort_data = df_business.merge(customer_first_purchase, on='salesperson')\n", + "cohort_data['periods_since_first'] = cohort_data['quarter'] - cohort_data['first_quarter']\n", + "\n", + "# Create cohort table\n", + "cohort_table = cohort_data.pivot_table(\n", + " values='net_sales',\n", + " index='first_quarter',\n", + " columns='periods_since_first',\n", + " aggfunc='mean',\n", + " fill_value=0\n", + ")\n", + "\n", + "print(\"Cohort analysis (average sales by quarters since first purchase):\")\n", + "print(cohort_table.round(0))\n", + "\n", + "# Retention analysis\n", + "cohort_counts = cohort_data.pivot_table(\n", + " values='salesperson',\n", + " index='first_quarter',\n", + " columns='periods_since_first',\n", + " aggfunc='count',\n", + " fill_value=0\n", + ")\n", + "\n", + "print(\"\\nCohort retention (number of active salespeople):\")\n", + "print(cohort_counts)\n", + "\n", + "# Calculate retention rates\n", + "cohort_retention = cohort_counts.divide(cohort_counts.iloc[:, 0], axis=0) * 100\n", + "print(\"\\nRetention rates (%):\")\n", + "print(cohort_retention.round(1))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Practice Exercises\n", + "\n", + "Apply pivot tables and reshaping to complex business scenarios:" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "metadata": {}, + "outputs": [], + "source": [ + "# Exercise 1: Multi-dimensional Business Intelligence Report\n", + "# Create a comprehensive BI report that includes:\n", + "# - Sales performance across multiple dimensions\n", + "# - Trend analysis with period-over-period comparisons\n", + "# - Customer segmentation insights\n", + "# - Product performance matrix\n", + "# - Actionable recommendations based on pivot table insights\n", + "\n", + "def create_comprehensive_bi_report(df):\n", + " \"\"\"Create multi-dimensional business intelligence report\"\"\"\n", + " # Your implementation here\n", + " pass\n", + "\n", + "# bi_report = create_comprehensive_bi_report(df_business)\n", + "# print(\"Comprehensive BI Report:\")\n", + "# for section, data in bi_report.items():\n", + "# print(f\"\\n{section}:\")\n", + "# print(data)" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "metadata": {}, + "outputs": [], + "source": [ + "# Exercise 2: Dynamic Pivot Table Generator\n", + "# Create a flexible function that can generate pivot tables with:\n", + "# - User-specified dimensions (rows, columns, values)\n", + "# - Multiple aggregation functions\n", + "# - Automatic handling of different data types\n", + "# - Export capabilities for different formats\n", + "\n", + "# Your code here:\n" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "metadata": {}, + "outputs": [], + "source": [ + "# Exercise 3: Advanced Reshaping Challenge\n", + "# Transform the data through multiple reshaping operations:\n", + "# - Convert to time series format\n", + "# - Create rolling calculations\n", + "# - Build comparison matrices\n", + "# - Generate variance analysis reports\n", + "\n", + "# Your code here:\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Key Takeaways\n", + "\n", + "1. **Pivot Tables**:\n", + " - **`pivot_table()`**: Most flexible, handles duplicates with aggregation\n", + " - **`pivot()`**: Simple reshaping, requires unique index-column combinations\n", + " - **`crosstab()`**: Specialized for frequency tables and cross-tabulation\n", + "\n", + "2. **Reshaping Operations**:\n", + " - **`melt()`**: Wide to long format (unpivot)\n", + " - **`pivot()`**: Long to wide format\n", + " - **`stack()`**: Column to row index\n", + " - **`unstack()`**: Row index to column\n", + "\n", + "3. **Best Practices**:\n", + " - Use `fill_value=0` to handle missing combinations\n", + " - Add `margins=True` for totals when needed\n", + " - Choose appropriate aggregation functions for your data\n", + " - Consider data types when reshaping\n", + "\n", + "4. **Business Applications**:\n", + " - Sales performance analysis\n", + " - Customer segmentation\n", + " - Trend analysis and forecasting\n", + " - Cohort and retention analysis\n", + "\n", + "## Pivot Table Quick Reference\n", + "\n", + "```python\n", + "# Basic pivot table\n", + "df.pivot_table(values='sales', index='region', columns='product', aggfunc='sum')\n", + "\n", + "# Multiple aggregations\n", + "df.pivot_table(values='sales', index='region', aggfunc=['sum', 'mean', 'count'])\n", + "\n", + "# With margins (totals)\n", + "df.pivot_table(values='sales', index='region', columns='product', \n", + " aggfunc='sum', margins=True)\n", + "\n", + "# Cross-tabulation\n", + "pd.crosstab(df['region'], df['product'], normalize='index')\n", + "\n", + "# Reshaping\n", + "pd.melt(df, id_vars=['id'], value_vars=['col1', 'col2']) # Wide to long\n", + "df.pivot(index='date', columns='category', values='value') # Long to wide\n", + "```\n", + "\n", + "## Common Use Cases\n", + "\n", + "| Scenario | Best Tool | Key Parameters |\n", + "|----------|-----------|----------------|\n", + "| Sales by region/product | `pivot_table()` | `values='sales', index='region', columns='product'` |\n", + "| Frequency analysis | `crosstab()` | `normalize='index'` for percentages |\n", + "| Time series analysis | `pivot()` + `unstack()` | Handle dates in index |\n", + "| Data normalization | `melt()` | `id_vars` for identifiers |\n", + "| Multi-level analysis | Hierarchical indexing | Multiple columns in `index`/`columns` |" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.13.3" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/Session_01/PandasDataFrame-exmples/10_time_series_analysis.ipynb b/Session_01/PandasDataFrame-exmples/10_time_series_analysis.ipynb new file mode 100755 index 0000000..47236a6 --- /dev/null +++ b/Session_01/PandasDataFrame-exmples/10_time_series_analysis.ipynb @@ -0,0 +1,1149 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Session 1 - DataFrames - Lesson 10: Time Series Analysis\n", + "\n", + "## Learning Objectives\n", + "- Master datetime indexing and time-based operations\n", + "- Learn resampling and frequency conversion techniques\n", + "- Understand rolling calculations and window functions\n", + "- Practice with seasonal analysis and trend decomposition\n", + "- Apply time series techniques to business forecasting scenarios\n", + "\n", + "## Prerequisites\n", + "- Completed Lessons 1-9\n", + "- Understanding of datetime concepts\n", + "- Basic knowledge of statistics (helpful for trend analysis)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Import required libraries\n", + "import pandas as pd\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "import seaborn as sns\n", + "from datetime import datetime, timedelta\n", + "import warnings\n", + "warnings.filterwarnings('ignore')\n", + "\n", + "# Set display options\n", + "pd.set_option('display.max_columns', None)\n", + "pd.set_option('display.max_rows', 20)\n", + "plt.style.use('seaborn-v0_8')\n", + "%matplotlib inline\n", + "\n", + "print(\"Libraries loaded successfully!\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Creating Time Series Data\n", + "\n", + "Let's create realistic time series datasets for analysis." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Create comprehensive time series dataset\n", + "np.random.seed(42)\n", + "\n", + "# Generate 2 years of daily data\n", + "start_date = '2022-01-01'\n", + "end_date = '2023-12-31'\n", + "date_range = pd.date_range(start=start_date, end=end_date, freq='D')\n", + "\n", + "# Create realistic sales data with trends and seasonality\n", + "n_days = len(date_range)\n", + "base_sales = 1000\n", + "\n", + "# Add trend (gradual increase over time)\n", + "trend = np.linspace(0, 300, n_days)\n", + "\n", + "# Add seasonality (weekly and monthly patterns)\n", + "daily_pattern = np.sin(2 * np.pi * np.arange(n_days) / 7) * 100 # Weekly pattern\n", + "monthly_pattern = np.sin(2 * np.pi * np.arange(n_days) / 30.44) * 150 # Monthly pattern\n", + "yearly_pattern = np.sin(2 * np.pi * np.arange(n_days) / 365.25) * 200 # Yearly pattern\n", + "\n", + "# Add random noise\n", + "noise = np.random.normal(0, 80, n_days)\n", + "\n", + "# Combine all components\n", + "sales = base_sales + trend + daily_pattern + monthly_pattern + yearly_pattern + noise\n", + "sales = np.maximum(sales, 0) # Ensure non-negative sales\n", + "\n", + "# Create DataFrame\n", + "ts_data = pd.DataFrame({\n", + " 'date': date_range,\n", + " 'sales': sales,\n", + " 'customers': np.random.poisson(50, n_days) + (sales / 50).astype(int),\n", + " 'marketing_spend': np.random.gamma(2, 20, n_days),\n", + " 'temperature': 20 + 10 * np.sin(2 * np.pi * np.arange(n_days) / 365.25) + np.random.normal(0, 5, n_days),\n", + " 'is_weekend': pd.Series(date_range).dt.dayofweek >= 5,\n", + " 'is_holiday': np.random.choice([True, False], n_days, p=[0.05, 0.95])\n", + "})\n", + "\n", + "# Set date as index\n", + "ts_data.set_index('date', inplace=True)\n", + "\n", + "print(\"Time series dataset created:\")\n", + "print(f\"Shape: {ts_data.shape}\")\n", + "print(f\"Date range: {ts_data.index.min()} to {ts_data.index.max()}\")\n", + "print(\"\\nFirst few rows:\")\n", + "print(ts_data.head())\n", + "print(\"\\nData types:\")\n", + "print(ts_data.dtypes)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 1. DateTime Indexing and Basic Operations\n", + "\n", + "Working with datetime indices and time-based selection." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Basic datetime index operations\n", + "print(\"=== DATETIME INDEX OPERATIONS ===\")\n", + "\n", + "# Index information\n", + "print(f\"Index type: {type(ts_data.index)}\")\n", + "print(f\"Index frequency: {ts_data.index.freq}\")\n", + "print(f\"Index is monotonic increasing: {ts_data.index.is_monotonic_increasing}\")\n", + "print(f\"Index is monotonic decreasing: {ts_data.index.is_monotonic_decreasing}\")\n", + "print(f\"Index has duplicates: {ts_data.index.has_duplicates}\")\n", + "\n", + "# Time-based selection\n", + "print(\"\\n--- Time-based Selection ---\")\n", + "\n", + "# Select specific year\n", + "sales_2022 = ts_data.loc['2022']\n", + "print(f\"2022 data shape: {sales_2022.shape}\")\n", + "print(f\"2022 average daily sales: {sales_2022['sales'].mean():.2f}\")\n", + "\n", + "# Select specific month\n", + "jan_2023 = ts_data.loc['2023-01']\n", + "print(f\"\\nJanuary 2023 data shape: {jan_2023.shape}\")\n", + "print(f\"January 2023 total sales: {jan_2023['sales'].sum():.2f}\")\n", + "\n", + "# Select date range\n", + "q1_2023 = ts_data.loc['2023-01-01':'2023-03-31']\n", + "print(f\"\\nQ1 2023 data shape: {q1_2023.shape}\")\n", + "print(f\"Q1 2023 average sales: {q1_2023['sales'].mean():.2f}\")\n", + "\n", + "# Recent data (last 30 days)\n", + "recent_data = ts_data.tail(30)\n", + "print(f\"\\nLast 30 days average sales: {recent_data['sales'].mean():.2f}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# DateTime component extraction\n", + "print(\"=== DATETIME COMPONENT EXTRACTION ===\")\n", + "\n", + "# Extract various date components\n", + "ts_enhanced = ts_data.copy()\n", + "ts_enhanced['year'] = ts_enhanced.index.year\n", + "ts_enhanced['month'] = ts_enhanced.index.month\n", + "ts_enhanced['quarter'] = ts_enhanced.index.quarter\n", + "ts_enhanced['day_of_week'] = ts_enhanced.index.dayofweek # 0=Monday, 6=Sunday\n", + "ts_enhanced['day_name'] = ts_enhanced.index.day_name()\n", + "ts_enhanced['month_name'] = ts_enhanced.index.month_name()\n", + "ts_enhanced['week_of_year'] = ts_enhanced.index.isocalendar().week\n", + "ts_enhanced['day_of_year'] = ts_enhanced.index.dayofyear\n", + "ts_enhanced['is_month_start'] = ts_enhanced.index.is_month_start\n", + "ts_enhanced['is_month_end'] = ts_enhanced.index.is_month_end\n", + "ts_enhanced['is_quarter_start'] = ts_enhanced.index.is_quarter_start\n", + "ts_enhanced['is_quarter_end'] = ts_enhanced.index.is_quarter_end\n", + "\n", + "print(\"Enhanced dataset with datetime components:\")\n", + "print(ts_enhanced[['sales', 'year', 'month', 'quarter', 'day_name', 'week_of_year']].head())\n", + "\n", + "# Analyze patterns by day of week\n", + "print(\"\\nSales patterns by day of week:\")\n", + "dow_analysis = ts_enhanced.groupby('day_name')['sales'].agg(['mean', 'std', 'count'])\n", + "# Reorder by weekday\n", + "day_order = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']\n", + "dow_analysis = dow_analysis.reindex(day_order)\n", + "print(dow_analysis.round(2))\n", + "\n", + "# Monthly patterns\n", + "print(\"\\nSales patterns by month:\")\n", + "monthly_analysis = ts_enhanced.groupby('month_name')['sales'].agg(['mean', 'std'])\n", + "month_order = ['January', 'February', 'March', 'April', 'May', 'June',\n", + " 'July', 'August', 'September', 'October', 'November', 'December']\n", + "monthly_analysis = monthly_analysis.reindex([m for m in month_order if m in monthly_analysis.index])\n", + "print(monthly_analysis.round(2))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Time series visualization\n", + "print(\"=== TIME SERIES VISUALIZATION ===\")\n", + "\n", + "# Create comprehensive time series plots\n", + "fig, axes = plt.subplots(2, 2, figsize=(15, 10))\n", + "\n", + "# Plot 1: Daily sales over time\n", + "ts_data['sales'].plot(ax=axes[0, 0], title='Daily Sales Over Time', alpha=0.7)\n", + "axes[0, 0].set_ylabel('Sales ($)')\n", + "axes[0, 0].grid(True, alpha=0.3)\n", + "\n", + "# Plot 2: Monthly aggregated sales\n", + "monthly_sales = ts_data['sales'].resample('M').sum()\n", + "monthly_sales.plot(ax=axes[0, 1], title='Monthly Sales', marker='o')\n", + "axes[0, 1].set_ylabel('Monthly Sales ($)')\n", + "axes[0, 1].grid(True, alpha=0.3)\n", + "\n", + "# Plot 3: Sales vs customers correlation\n", + "axes[1, 0].scatter(ts_data['customers'], ts_data['sales'], alpha=0.5)\n", + "axes[1, 0].set_title('Sales vs Customers')\n", + "axes[1, 0].set_xlabel('Number of Customers')\n", + "axes[1, 0].set_ylabel('Sales ($)')\n", + "axes[1, 0].grid(True, alpha=0.3)\n", + "\n", + "# Plot 4: Seasonal pattern (by month)\n", + "ts_enhanced.groupby('month')['sales'].mean().plot(ax=axes[1, 1], kind='bar', \n", + " title='Average Sales by Month')\n", + "axes[1, 1].set_ylabel('Average Sales ($)')\n", + "axes[1, 1].set_xlabel('Month')\n", + "axes[1, 1].tick_params(axis='x', rotation=45)\n", + "\n", + "plt.tight_layout()\n", + "plt.show()\n", + "\n", + "# Summary statistics\n", + "print(\"\\nTime series summary statistics:\")\n", + "print(ts_data['sales'].describe())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 2. Resampling and Frequency Conversion\n", + "\n", + "Converting between different time frequencies and aggregating data." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Basic resampling operations\n", + "print(\"=== BASIC RESAMPLING ===\")\n", + "\n", + "# Resample to different frequencies\n", + "weekly_data = ts_data.resample('W').agg({\n", + " 'sales': 'sum',\n", + " 'customers': 'sum',\n", + " 'marketing_spend': 'sum',\n", + " 'temperature': 'mean'\n", + "})\n", + "\n", + "print(\"Weekly resampled data:\")\n", + "print(weekly_data.head(10))\n", + "\n", + "# Monthly resampling with multiple aggregations\n", + "monthly_data = ts_data.resample('M').agg({\n", + " 'sales': ['sum', 'mean', 'std', 'min', 'max'],\n", + " 'customers': ['sum', 'mean'],\n", + " 'marketing_spend': 'sum',\n", + " 'temperature': 'mean'\n", + "})\n", + "\n", + "print(\"\\nMonthly resampled data (first 6 months):\")\n", + "print(monthly_data.head(6))\n", + "\n", + "# Quarterly resampling\n", + "quarterly_data = ts_data.resample('Q').agg({\n", + " 'sales': 'sum',\n", + " 'customers': 'sum',\n", + " 'marketing_spend': 'sum'\n", + "})\n", + "\n", + "print(\"\\nQuarterly resampled data:\")\n", + "print(quarterly_data)\n", + "\n", + "# Year-over-year comparison\n", + "yearly_data = ts_data.resample('Y').agg({\n", + " 'sales': 'sum',\n", + " 'customers': 'sum',\n", + " 'marketing_spend': 'sum'\n", + "})\n", + "\n", + "print(\"\\nYearly resampled data:\")\n", + "print(yearly_data)\n", + "\n", + "# Calculate year-over-year growth\n", + "if len(yearly_data) > 1:\n", + " yoy_growth = yearly_data.pct_change() * 100\n", + " print(\"\\nYear-over-year growth (%):\")\n", + " print(yoy_growth.round(2))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Advanced resampling techniques\n", + "print(\"=== ADVANCED RESAMPLING ===\")\n", + "\n", + "# Custom aggregation functions\n", + "def coefficient_of_variation(series):\n", + " \"\"\"Calculate coefficient of variation\"\"\"\n", + " return series.std() / series.mean() if series.mean() != 0 else 0\n", + "\n", + "def sales_volatility(series):\n", + " \"\"\"Calculate sales volatility (std/mean)\"\"\"\n", + " return series.std()\n", + "\n", + "# Custom resampling with multiple functions\n", + "custom_monthly = ts_data.resample('M').agg({\n", + " 'sales': ['sum', 'mean', coefficient_of_variation, sales_volatility],\n", + " 'customers': ['sum', 'mean'],\n", + " 'marketing_spend': 'sum'\n", + "})\n", + "\n", + "print(\"Custom monthly aggregations:\")\n", + "print(custom_monthly.round(3))\n", + "\n", + "# Resampling with different anchor points\n", + "# Weekly data starting on different days\n", + "weekly_sunday = ts_data.resample('W-SUN')['sales'].sum() # Week ending Sunday\n", + "weekly_monday = ts_data.resample('W-MON')['sales'].sum() # Week ending Monday\n", + "\n", + "print(\"\\nWeekly totals comparison (first 10 weeks):\")\n", + "weekly_comparison = pd.DataFrame({\n", + " 'Week_End_Sunday': weekly_sunday,\n", + " 'Week_End_Monday': weekly_monday\n", + "})\n", + "print(weekly_comparison.head(10))\n", + "\n", + "# Business day resampling\n", + "business_weekly = ts_data.resample('B').mean() # Business days only\n", + "print(f\"\\nBusiness days data shape: {business_weekly.shape}\")\n", + "print(\"Business days average (first 10):\")\n", + "print(business_weekly[['sales', 'customers']].head(10))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Upsampling and downsampling\n", + "print(\"=== UPSAMPLING AND DOWNSAMPLING ===\")\n", + "\n", + "# Downsample to weekly and visualize\n", + "weekly_sales = ts_data['sales'].resample('W').sum()\n", + "\n", + "# Upsample weekly back to daily (forward fill)\n", + "upsampled_ffill = weekly_sales.resample('D').ffill()\n", + "\n", + "# Upsample with interpolation\n", + "upsampled_interp = weekly_sales.resample('D').interpolate()\n", + "\n", + "print(\"Upsampling comparison (sample period):\")\n", + "\n", + "# Fix: Use the date range directly, not the filtered DataFrame\n", + "start_date = '2023-01-01'\n", + "end_date = '2023-01-31'\n", + "\n", + "upsample_comparison = pd.DataFrame({\n", + " 'Original_Daily': ts_data.loc[start_date:end_date, 'sales'],\n", + " 'Weekly_Upsampled_FFill': upsampled_ffill.loc[start_date:end_date],\n", + " 'Weekly_Upsampled_Interp': upsampled_interp.loc[start_date:end_date]\n", + "})\n", + "\n", + "print(upsample_comparison.head(15))\n", + "\n", + "# Visualize upsampling methods\n", + "plt.figure(figsize=(12, 6))\n", + "plt.plot(upsample_comparison.index, upsample_comparison['Original_Daily'], \n", + " label='Original Daily', alpha=0.7)\n", + "plt.plot(upsample_comparison.index, upsample_comparison['Weekly_Upsampled_FFill'], \n", + " label='Forward Fill', linestyle='--')\n", + "plt.plot(upsample_comparison.index, upsample_comparison['Weekly_Upsampled_Interp'], \n", + " label='Interpolated', linestyle='-.')\n", + "plt.title('Upsampling Methods Comparison')\n", + "plt.xlabel('Date')\n", + "plt.ylabel('Sales ($)')\n", + "plt.legend()\n", + "plt.grid(True, alpha=0.3)\n", + "plt.tight_layout()\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 3. Rolling Calculations and Window Functions\n", + "\n", + "Moving averages, rolling statistics, and window-based analysis." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Basic rolling calculations\n", + "print(\"=== BASIC ROLLING CALCULATIONS ===\")\n", + "\n", + "# Calculate various rolling statistics\n", + "rolling_data = ts_data.copy()\n", + "\n", + "# Rolling means (moving averages)\n", + "rolling_data['sales_7d_avg'] = rolling_data['sales'].rolling(window=7).mean()\n", + "rolling_data['sales_30d_avg'] = rolling_data['sales'].rolling(window=30).mean()\n", + "rolling_data['sales_90d_avg'] = rolling_data['sales'].rolling(window=90).mean()\n", + "\n", + "# Rolling standard deviation (volatility)\n", + "rolling_data['sales_7d_std'] = rolling_data['sales'].rolling(window=7).std()\n", + "rolling_data['sales_30d_std'] = rolling_data['sales'].rolling(window=30).std()\n", + "\n", + "# Rolling min/max\n", + "rolling_data['sales_30d_min'] = rolling_data['sales'].rolling(window=30).min()\n", + "rolling_data['sales_30d_max'] = rolling_data['sales'].rolling(window=30).max()\n", + "\n", + "print(\"Rolling statistics (last 10 days):\")\n", + "rolling_cols = ['sales', 'sales_7d_avg', 'sales_30d_avg', 'sales_7d_std', 'sales_30d_std']\n", + "print(rolling_data[rolling_cols].tail(10).round(2))\n", + "\n", + "# Rolling sum for cumulative analysis\n", + "rolling_data['sales_7d_sum'] = rolling_data['sales'].rolling(window=7).sum()\n", + "rolling_data['sales_30d_sum'] = rolling_data['sales'].rolling(window=30).sum()\n", + "\n", + "print(\"\\nRolling sums (last 5 days):\")\n", + "print(rolling_data[['sales', 'sales_7d_sum', 'sales_30d_sum']].tail(5).round(0))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Advanced rolling calculations\n", + "print(\"=== ADVANCED ROLLING CALCULATIONS ===\")\n", + "\n", + "# Rolling correlation between variables\n", + "rolling_data['sales_customers_corr_30d'] = rolling_data['sales'].rolling(window=30).corr(rolling_data['customers'])\n", + "rolling_data['sales_marketing_corr_30d'] = rolling_data['sales'].rolling(window=30).corr(rolling_data['marketing_spend'])\n", + "\n", + "print(\"Rolling correlations (last 10 days):\")\n", + "corr_cols = ['sales_customers_corr_30d', 'sales_marketing_corr_30d']\n", + "print(rolling_data[corr_cols].tail(10).round(3))\n", + "\n", + "# Rolling quantiles\n", + "rolling_data['sales_30d_q25'] = rolling_data['sales'].rolling(window=30).quantile(0.25)\n", + "rolling_data['sales_30d_q75'] = rolling_data['sales'].rolling(window=30).quantile(0.75)\n", + "rolling_data['sales_30d_median'] = rolling_data['sales'].rolling(window=30).median()\n", + "\n", + "print(\"\\nRolling quantiles (last 5 days):\")\n", + "quantile_cols = ['sales', 'sales_30d_q25', 'sales_30d_median', 'sales_30d_q75']\n", + "print(rolling_data[quantile_cols].tail(5).round(2))\n", + "\n", + "# Custom rolling functions\n", + "def rolling_cv(series):\n", + " \"\"\"Rolling coefficient of variation\"\"\"\n", + " return series.std() / series.mean() if series.mean() != 0 else 0\n", + "\n", + "def rolling_skewness(series):\n", + " \"\"\"Rolling skewness\"\"\"\n", + " return series.skew()\n", + "\n", + "rolling_data['sales_30d_cv'] = rolling_data['sales'].rolling(window=30).apply(rolling_cv)\n", + "rolling_data['sales_30d_skew'] = rolling_data['sales'].rolling(window=30).apply(rolling_skewness)\n", + "\n", + "print(\"\\nCustom rolling statistics (last 5 days):\")\n", + "custom_cols = ['sales_30d_cv', 'sales_30d_skew']\n", + "print(rolling_data[custom_cols].tail(5).round(3))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Exponentially weighted functions\n", + "print(\"=== EXPONENTIALLY WEIGHTED FUNCTIONS ===\")\n", + "\n", + "# Exponentially weighted moving average (EWMA)\n", + "rolling_data['sales_ewm_10'] = rolling_data['sales'].ewm(span=10).mean()\n", + "rolling_data['sales_ewm_30'] = rolling_data['sales'].ewm(span=30).mean()\n", + "\n", + "# Exponentially weighted standard deviation\n", + "rolling_data['sales_ewm_std_10'] = rolling_data['sales'].ewm(span=10).std()\n", + "\n", + "print(\"Exponentially weighted statistics (last 10 days):\")\n", + "ewm_cols = ['sales', 'sales_7d_avg', 'sales_ewm_10', 'sales_ewm_30']\n", + "print(rolling_data[ewm_cols].tail(10).round(2))\n", + "\n", + "# Visualize different smoothing methods\n", + "plt.figure(figsize=(15, 8))\n", + "\n", + "# Plot last 90 days for clarity\n", + "recent_period = rolling_data.tail(90)\n", + "\n", + "plt.plot(recent_period.index, recent_period['sales'], label='Original Sales', alpha=0.7)\n", + "plt.plot(recent_period.index, recent_period['sales_7d_avg'], label='7-day MA', linewidth=2)\n", + "plt.plot(recent_period.index, recent_period['sales_30d_avg'], label='30-day MA', linewidth=2)\n", + "plt.plot(recent_period.index, recent_period['sales_ewm_10'], label='EWM (span=10)', linewidth=2)\n", + "\n", + "plt.title('Sales Smoothing Methods Comparison (Last 90 Days)')\n", + "plt.xlabel('Date')\n", + "plt.ylabel('Sales ($)')\n", + "plt.legend()\n", + "plt.grid(True, alpha=0.3)\n", + "plt.tight_layout()\n", + "plt.show()\n", + "\n", + "# Calculate lag between different smoothing methods\n", + "print(\"\\nSmoothing method responsiveness (correlation with original):\")\n", + "responsiveness = {\n", + " '7-day MA': rolling_data['sales'].corr(rolling_data['sales_7d_avg']),\n", + " '30-day MA': rolling_data['sales'].corr(rolling_data['sales_30d_avg']),\n", + " 'EWM (span=10)': rolling_data['sales'].corr(rolling_data['sales_ewm_10']),\n", + " 'EWM (span=30)': rolling_data['sales'].corr(rolling_data['sales_ewm_30'])\n", + "}\n", + "\n", + "for method, corr in responsiveness.items():\n", + " print(f\"{method}: {corr:.4f}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 4. Seasonal Analysis and Decomposition\n", + "\n", + "Analyzing seasonal patterns and decomposing time series." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Seasonal pattern analysis\n", + "print(\"=== SEASONAL PATTERN ANALYSIS ===\")\n", + "\n", + "# Add more detailed time components\n", + "seasonal_data = ts_data.copy()\n", + "seasonal_data['month'] = seasonal_data.index.month\n", + "seasonal_data['quarter'] = seasonal_data.index.quarter\n", + "seasonal_data['day_of_week'] = seasonal_data.index.dayofweek\n", + "seasonal_data['week_of_year'] = seasonal_data.index.isocalendar().week\n", + "seasonal_data['day_of_year'] = seasonal_data.index.dayofyear\n", + "\n", + "# Monthly seasonality\n", + "monthly_pattern = seasonal_data.groupby('month')['sales'].agg(['mean', 'std', 'count'])\n", + "print(\"Monthly sales patterns:\")\n", + "print(monthly_pattern.round(2))\n", + "\n", + "# Day of week patterns\n", + "dow_pattern = seasonal_data.groupby('day_of_week')['sales'].agg(['mean', 'std'])\n", + "dow_pattern.index = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']\n", + "print(\"\\nDay of week patterns:\")\n", + "print(dow_pattern.round(2))\n", + "\n", + "# Weekly patterns throughout the year\n", + "weekly_pattern = seasonal_data.groupby('week_of_year')['sales'].mean()\n", + "print(\"\\nWeekly pattern statistics:\")\n", + "print(f\"Highest week: Week {weekly_pattern.idxmax()} (${weekly_pattern.max():.0f})\")\n", + "print(f\"Lowest week: Week {weekly_pattern.idxmin()} (${weekly_pattern.min():.0f})\")\n", + "print(f\"Weekly variation: {weekly_pattern.std():.2f}\")\n", + "\n", + "# Quarterly analysis\n", + "quarterly_pattern = seasonal_data.groupby('quarter')['sales'].agg(['mean', 'sum', 'std'])\n", + "print(\"\\nQuarterly patterns:\")\n", + "print(quarterly_pattern.round(2))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Simple seasonal decomposition\n", + "print(\"=== SEASONAL DECOMPOSITION ===\")\n", + "\n", + "# Manual decomposition approach\n", + "def simple_decompose(series, period=365):\n", + " \"\"\"Simple seasonal decomposition\"\"\"\n", + " # Trend (using centered moving average)\n", + " trend = series.rolling(window=period, center=True).mean()\n", + " \n", + " # Detrended series\n", + " detrended = series - trend\n", + " \n", + " # Seasonal component (average for each period)\n", + " seasonal_avg = detrended.groupby(detrended.index.dayofyear).mean()\n", + " seasonal = pd.Series(index=series.index, dtype=float)\n", + " for idx in series.index:\n", + " day_of_year = idx.dayofyear\n", + " if day_of_year in seasonal_avg.index:\n", + " seasonal.loc[idx] = seasonal_avg.loc[day_of_year]\n", + " else: # Handle leap year day\n", + " seasonal.loc[idx] = 0\n", + " \n", + " # Residual (what's left after removing trend and seasonality)\n", + " residual = series - trend - seasonal\n", + " \n", + " return trend, seasonal, residual\n", + "\n", + "# Decompose sales data\n", + "trend, seasonal, residual = simple_decompose(ts_data['sales'])\n", + "\n", + "# Create decomposition DataFrame\n", + "decomposition = pd.DataFrame({\n", + " 'original': ts_data['sales'],\n", + " 'trend': trend,\n", + " 'seasonal': seasonal,\n", + " 'residual': residual\n", + "})\n", + "\n", + "print(\"Decomposition summary:\")\n", + "print(decomposition.describe().round(2))\n", + "\n", + "# Visualize decomposition\n", + "fig, axes = plt.subplots(4, 1, figsize=(15, 12))\n", + "\n", + "# Original series\n", + "decomposition['original'].plot(ax=axes[0], title='Original Sales Data')\n", + "axes[0].set_ylabel('Sales ($)')\n", + "axes[0].grid(True, alpha=0.3)\n", + "\n", + "# Trend\n", + "decomposition['trend'].plot(ax=axes[1], title='Trend Component', color='red')\n", + "axes[1].set_ylabel('Trend ($)')\n", + "axes[1].grid(True, alpha=0.3)\n", + "\n", + "# Seasonal\n", + "decomposition['seasonal'].plot(ax=axes[2], title='Seasonal Component', color='green')\n", + "axes[2].set_ylabel('Seasonal ($)')\n", + "axes[2].grid(True, alpha=0.3)\n", + "\n", + "# Residual\n", + "decomposition['residual'].plot(ax=axes[3], title='Residual Component', color='purple')\n", + "axes[3].set_ylabel('Residual ($)')\n", + "axes[3].set_xlabel('Date')\n", + "axes[3].grid(True, alpha=0.3)\n", + "\n", + "plt.tight_layout()\n", + "plt.show()\n", + "\n", + "print(\"\\nDecomposition insights:\")\n", + "print(f\"Trend contribution: {trend.std():.2f} (std dev)\")\n", + "print(f\"Seasonal contribution: {seasonal.std():.2f} (std dev)\")\n", + "print(f\"Residual contribution: {residual.std():.2f} (std dev)\")\n", + "print(f\"Total variation: {ts_data['sales'].std():.2f} (std dev)\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Advanced seasonal analysis\n", + "print(\"=== ADVANCED SEASONAL ANALYSIS ===\")\n", + "\n", + "# Year-over-year comparison\n", + "yoy_comparison = pd.DataFrame()\n", + "for year in ts_data.index.year.unique():\n", + " year_data = ts_data[ts_data.index.year == year]['sales']\n", + " year_data.index = year_data.index.dayofyear\n", + " yoy_comparison[f'Year_{year}'] = year_data\n", + "\n", + "print(\"Year-over-year comparison (sample days):\")\n", + "print(yoy_comparison.head(10).round(2))\n", + "\n", + "# Calculate year-over-year changes\n", + "if len(yoy_comparison.columns) > 1:\n", + " yoy_change = yoy_comparison.pct_change(axis=1) * 100\n", + " print(\"\\nYear-over-year change statistics:\")\n", + " for col in yoy_change.columns[1:]:\n", + " print(f\"{col}: mean={yoy_change[col].mean():.2f}%, std={yoy_change[col].std():.2f}%\")\n", + "\n", + "# Seasonal strength measurement\n", + "def seasonal_strength(series, period=365):\n", + " \"\"\"Calculate seasonal strength (0 = no seasonality, 1 = pure seasonality)\"\"\"\n", + " # Detrend the series\n", + " trend = series.rolling(window=period, center=True).mean()\n", + " detrended = series - trend\n", + " \n", + " # Calculate seasonal component\n", + " seasonal_avg = detrended.groupby(detrended.index.dayofyear).mean()\n", + " seasonal_var = seasonal_avg.var()\n", + " \n", + " # Calculate residual variance\n", + " seasonal_full = pd.Series(index=series.index, dtype=float)\n", + " for idx in series.index:\n", + " day_of_year = idx.dayofyear\n", + " if day_of_year in seasonal_avg.index:\n", + " seasonal_full.loc[idx] = seasonal_avg.loc[day_of_year]\n", + " else:\n", + " seasonal_full.loc[idx] = 0\n", + " \n", + " residual = detrended - seasonal_full\n", + " residual_var = residual.var()\n", + " \n", + " # Seasonal strength\n", + " return seasonal_var / (seasonal_var + residual_var)\n", + "\n", + "sales_seasonal_strength = seasonal_strength(ts_data['sales'])\n", + "print(f\"\\nSales seasonal strength: {sales_seasonal_strength:.3f}\")\n", + "print(\"(0 = no seasonality, 1 = pure seasonality)\")\n", + "\n", + "# Identify most/least seasonal periods\n", + "monthly_seasonal = seasonal_data.groupby('month')['sales'].std()\n", + "print(f\"\\nMost variable month: {monthly_seasonal.idxmax()} (std: {monthly_seasonal.max():.2f})\")\n", + "print(f\"Least variable month: {monthly_seasonal.idxmin()} (std: {monthly_seasonal.min():.2f})\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 5. Business Applications and Forecasting\n", + "\n", + "Real-world time series analysis for business insights." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Business performance metrics\n", + "print(\"=== BUSINESS PERFORMANCE METRICS ===\")\n", + "\n", + "def calculate_business_metrics(df):\n", + " \"\"\"Calculate key business time series metrics\"\"\"\n", + " metrics = {}\n", + " \n", + " # Growth metrics\n", + " daily_sales = df['sales']\n", + " metrics['total_sales'] = daily_sales.sum()\n", + " metrics['avg_daily_sales'] = daily_sales.mean()\n", + " metrics['sales_growth_rate'] = (daily_sales.iloc[-30:].mean() / daily_sales.iloc[:30].mean() - 1) * 100\n", + " \n", + " # Volatility metrics\n", + " metrics['sales_volatility'] = daily_sales.std()\n", + " metrics['coefficient_of_variation'] = daily_sales.std() / daily_sales.mean()\n", + " \n", + " # Trend metrics\n", + " trend = daily_sales.rolling(window=30).mean()\n", + " metrics['trend_direction'] = 'Increasing' if trend.iloc[-1] > trend.iloc[-30] else 'Decreasing'\n", + " metrics['trend_strength'] = abs(trend.iloc[-1] - trend.iloc[-30]) / trend.iloc[-30] * 100\n", + " \n", + " # Customer metrics\n", + " metrics['avg_customers_per_day'] = df['customers'].mean()\n", + " metrics['sales_per_customer'] = df['sales'].sum() / df['customers'].sum()\n", + " \n", + " # Marketing efficiency\n", + " metrics['marketing_roi'] = df['sales'].sum() / df['marketing_spend'].sum()\n", + " \n", + " return metrics\n", + "\n", + "# Calculate metrics for different periods\n", + "overall_metrics = calculate_business_metrics(ts_data)\n", + "recent_metrics = calculate_business_metrics(ts_data.tail(90)) # Last 90 days\n", + "\n", + "print(\"Overall Performance Metrics:\")\n", + "for metric, value in overall_metrics.items():\n", + " if isinstance(value, float):\n", + " print(f\"{metric}: {value:.2f}\")\n", + " else:\n", + " print(f\"{metric}: {value}\")\n", + "\n", + "print(\"\\nRecent 90-day Performance Metrics:\")\n", + "for metric, value in recent_metrics.items():\n", + " if isinstance(value, float):\n", + " print(f\"{metric}: {value:.2f}\")\n", + " else:\n", + " print(f\"{metric}: {value}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Simple forecasting using historical patterns\n", + "print(\"=== SIMPLE FORECASTING ===\")\n", + "\n", + "def simple_forecast(series, periods=30, method='seasonal_naive'):\n", + " \"\"\"Simple forecasting methods\"\"\"\n", + " if method == 'naive':\n", + " # Naive: repeat last value\n", + " return pd.Series([series.iloc[-1]] * periods, \n", + " index=pd.date_range(series.index[-1] + pd.Timedelta(days=1), periods=periods))\n", + " \n", + " elif method == 'seasonal_naive':\n", + " # Seasonal naive: repeat same day from previous year\n", + " forecast_dates = pd.date_range(series.index[-1] + pd.Timedelta(days=1), periods=periods)\n", + " forecast_values = []\n", + " \n", + " for date in forecast_dates:\n", + " # Find same day of year from previous year\n", + " previous_year_date = date - pd.DateOffset(years=1)\n", + " if previous_year_date in series.index:\n", + " forecast_values.append(series.loc[previous_year_date])\n", + " else:\n", + " # Fallback to seasonal average\n", + " day_of_year = date.dayofyear\n", + " same_day_values = series[series.index.dayofyear == day_of_year]\n", + " if len(same_day_values) > 0:\n", + " forecast_values.append(same_day_values.mean())\n", + " else:\n", + " forecast_values.append(series.mean())\n", + " \n", + " return pd.Series(forecast_values, index=forecast_dates)\n", + " \n", + " elif method == 'moving_average':\n", + " # Moving average forecast\n", + " ma_value = series.tail(30).mean()\n", + " return pd.Series([ma_value] * periods,\n", + " index=pd.date_range(series.index[-1] + pd.Timedelta(days=1), periods=periods))\n", + " \n", + " elif method == 'trend':\n", + " # Linear trend forecast\n", + " from scipy import stats\n", + " x = np.arange(len(series))\n", + " slope, intercept, _, _, _ = stats.linregress(x, series.values)\n", + " \n", + " forecast_dates = pd.date_range(series.index[-1] + pd.Timedelta(days=1), periods=periods)\n", + " forecast_values = [slope * (len(series) + i) + intercept for i in range(1, periods + 1)]\n", + " \n", + " return pd.Series(forecast_values, index=forecast_dates)\n", + "\n", + "# Generate forecasts using different methods\n", + "forecast_periods = 30\n", + "sales_series = ts_data['sales']\n", + "\n", + "forecasts = {\n", + " 'Naive': simple_forecast(sales_series, forecast_periods, 'naive'),\n", + " 'Seasonal_Naive': simple_forecast(sales_series, forecast_periods, 'seasonal_naive'),\n", + " 'Moving_Average': simple_forecast(sales_series, forecast_periods, 'moving_average'),\n", + " 'Trend': simple_forecast(sales_series, forecast_periods, 'trend')\n", + "}\n", + "\n", + "print(f\"Forecasts for next {forecast_periods} days:\")\n", + "forecast_df = pd.DataFrame(forecasts)\n", + "print(forecast_df.head(10).round(2))\n", + "\n", + "print(\"\\nForecast summary statistics:\")\n", + "print(forecast_df.describe().round(2))\n", + "\n", + "# Visualize forecasts\n", + "plt.figure(figsize=(15, 8))\n", + "\n", + "# Plot historical data (last 90 days)\n", + "historical_period = sales_series.tail(90)\n", + "plt.plot(historical_period.index, historical_period.values, label='Historical Data', color='black', linewidth=2)\n", + "\n", + "# Plot forecasts\n", + "colors = ['red', 'blue', 'green', 'orange']\n", + "for i, (method, forecast) in enumerate(forecasts.items()):\n", + " plt.plot(forecast.index, forecast.values, label=f'{method} Forecast', \n", + " color=colors[i], linestyle='--', linewidth=2)\n", + "\n", + "plt.title('Sales Forecasting Comparison')\n", + "plt.xlabel('Date')\n", + "plt.ylabel('Sales ($)')\n", + "plt.legend()\n", + "plt.grid(True, alpha=0.3)\n", + "plt.tight_layout()\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Anomaly detection in time series\n", + "print(\"=== ANOMALY DETECTION ===\")\n", + "\n", + "def detect_anomalies(series, method='zscore', threshold=3):\n", + " \"\"\"Detect anomalies in time series\"\"\"\n", + " anomalies = pd.Series(False, index=series.index)\n", + " \n", + " if method == 'zscore':\n", + " # Z-score method\n", + " z_scores = np.abs((series - series.mean()) / series.std())\n", + " anomalies = z_scores > threshold\n", + " \n", + " elif method == 'iqr':\n", + " # Interquartile range method\n", + " Q1 = series.quantile(0.25)\n", + " Q3 = series.quantile(0.75)\n", + " IQR = Q3 - Q1\n", + " lower_bound = Q1 - 1.5 * IQR\n", + " upper_bound = Q3 + 1.5 * IQR\n", + " anomalies = (series < lower_bound) | (series > upper_bound)\n", + " \n", + " elif method == 'rolling':\n", + " # Rolling window method\n", + " rolling_mean = series.rolling(window=30).mean()\n", + " rolling_std = series.rolling(window=30).std()\n", + " z_scores = np.abs((series - rolling_mean) / rolling_std)\n", + " anomalies = z_scores > threshold\n", + " \n", + " return anomalies\n", + "\n", + "# Detect anomalies using different methods\n", + "anomaly_methods = ['zscore', 'iqr', 'rolling']\n", + "anomaly_results = {}\n", + "\n", + "for method in anomaly_methods:\n", + " anomalies = detect_anomalies(ts_data['sales'], method=method)\n", + " anomaly_results[method] = anomalies\n", + " print(f\"{method.upper()} method: {anomalies.sum()} anomalies detected\")\n", + "\n", + "# Combine anomaly detection results\n", + "anomaly_df = pd.DataFrame(anomaly_results)\n", + "anomaly_df['any_method'] = anomaly_df.any(axis=1)\n", + "anomaly_df['all_methods'] = anomaly_df[anomaly_methods].all(axis=1)\n", + "\n", + "print(f\"\\nAnomalies detected by any method: {anomaly_df['any_method'].sum()}\")\n", + "print(f\"Anomalies detected by all methods: {anomaly_df['all_methods'].sum()}\")\n", + "\n", + "# Show anomalous dates\n", + "severe_anomalies = ts_data[anomaly_df['all_methods']]\n", + "if len(severe_anomalies) > 0:\n", + " print(\"\\nSevere anomalies (detected by all methods):\")\n", + " print(severe_anomalies[['sales', 'customers', 'marketing_spend']].round(2))\n", + "\n", + "# Visualize anomalies\n", + "plt.figure(figsize=(15, 8))\n", + "\n", + "# Plot sales data\n", + "plt.plot(ts_data.index, ts_data['sales'], label='Sales Data', alpha=0.7)\n", + "\n", + "# Highlight anomalies\n", + "for method in anomaly_methods:\n", + " anomaly_dates = ts_data.index[anomaly_results[method]]\n", + " anomaly_values = ts_data.loc[anomaly_dates, 'sales']\n", + " plt.scatter(anomaly_dates, anomaly_values, label=f'{method.upper()} Anomalies', alpha=0.7, s=30)\n", + "\n", + "plt.title('Sales Data with Anomaly Detection')\n", + "plt.xlabel('Date')\n", + "plt.ylabel('Sales ($)')\n", + "plt.legend()\n", + "plt.grid(True, alpha=0.3)\n", + "plt.tight_layout()\n", + "plt.show()\n", + "\n", + "# Anomaly statistics\n", + "print(\"\\nAnomaly statistics:\")\n", + "for method in anomaly_methods:\n", + " anomaly_sales = ts_data.loc[anomaly_results[method], 'sales']\n", + " if len(anomaly_sales) > 0:\n", + " print(f\"{method.upper()}: mean=${anomaly_sales.mean():.2f}, std=${anomaly_sales.std():.2f}\")\n", + " else:\n", + " print(f\"{method.upper()}: No anomalies detected\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Practice Exercises\n", + "\n", + "Apply time series analysis to complex business scenarios:" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [], + "source": [ + "# Exercise 1: Comprehensive Time Series Dashboard\n", + "# Create a complete time series analysis dashboard that includes:\n", + "# - Multiple time series metrics and KPIs\n", + "# - Seasonal analysis and trend identification\n", + "# - Anomaly detection and alerting\n", + "# - Forecasting with confidence intervals\n", + "# - Business insights and recommendations\n", + "\n", + "def create_time_series_dashboard(df):\n", + " \"\"\"Create comprehensive time series analysis dashboard\"\"\"\n", + " # Your implementation here\n", + " pass\n", + "\n", + "# dashboard = create_time_series_dashboard(ts_data)\n", + "# print(\"Time Series Dashboard Created\")" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [], + "source": [ + "# Exercise 2: Multi-variate Time Series Analysis\n", + "# Analyze relationships between multiple time series:\n", + "# - Cross-correlation analysis\n", + "# - Lead-lag relationships\n", + "# - Causality testing\n", + "# - Multi-variate forecasting\n", + "\n", + "# Your code here:\n" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [], + "source": [ + "# Exercise 3: Advanced Forecasting Challenge\n", + "# Implement more sophisticated forecasting methods:\n", + "# - Exponential smoothing with trend and seasonality\n", + "# - ARIMA modeling\n", + "# - Model evaluation and selection\n", + "# - Forecast accuracy metrics\n", + "\n", + "# Your code here:\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Key Takeaways\n", + "\n", + "1. **DateTime Indexing**:\n", + " - Use `pd.DatetimeIndex` for time-based operations\n", + " - Enable powerful time-based selection and slicing\n", + " - Extract components (year, month, day, etc.) for analysis\n", + "\n", + "2. **Resampling**:\n", + " - **`.resample()`**: Convert between different frequencies\n", + " - **Downsampling**: Aggregate to lower frequency (daily → monthly)\n", + " - **Upsampling**: Convert to higher frequency (monthly → daily)\n", + " - Use appropriate aggregation functions for your data\n", + "\n", + "3. **Rolling Calculations**:\n", + " - **`.rolling()`**: Moving window calculations\n", + " - **`.ewm()`**: Exponentially weighted functions\n", + " - Useful for smoothing and trend analysis\n", + " - Handle missing values appropriately\n", + "\n", + "4. **Seasonal Analysis**:\n", + " - Identify patterns by time components\n", + " - Decompose into trend, seasonal, and residual\n", + " - Measure seasonal strength and variability\n", + "\n", + "## Time Series Quick Reference\n", + "\n", + "```python\n", + "# Create datetime index\n", + "df.set_index(pd.to_datetime(df['date']), inplace=True)\n", + "\n", + "# Time-based selection\n", + "df['2023'] # Select year\n", + "df['2023-01'] # Select month\n", + "df['2023-01-01':'2023-01-31'] # Date range\n", + "\n", + "# Resampling\n", + "df.resample('M').sum() # Monthly sum\n", + "df.resample('W').mean() # Weekly average\n", + "df.resample('Q').agg({'col': ['sum', 'mean']}) # Quarterly multi-agg\n", + "\n", + "# Rolling calculations\n", + "df['col'].rolling(7).mean() # 7-period moving average\n", + "df['col'].ewm(span=10).mean() # Exponential moving average\n", + "df['col'].rolling(30).std() # 30-period rolling standard deviation\n", + "```\n", + "\n", + "## Business Applications\n", + "\n", + "| Use Case | Technique | Key Insights |\n", + "|----------|-----------|-------------|\n", + "| Sales forecasting | Seasonal decomposition + trends | Predict future performance |\n", + "| Anomaly detection | Rolling statistics + thresholds | Identify unusual patterns |\n", + "| Performance monitoring | Moving averages + KPIs | Track business health |\n", + "| Seasonal planning | Seasonal analysis | Optimize inventory/staffing |\n", + "| Marketing ROI | Cross-correlation analysis | Measure campaign effectiveness |\n", + "\n", + "## Best Practices\n", + "\n", + "1. **Data Quality**: Ensure consistent time intervals and handle missing data\n", + "2. **Frequency Choice**: Choose appropriate resampling frequency for your analysis\n", + "3. **Window Size**: Balance responsiveness vs. smoothness in rolling calculations\n", + "4. **Seasonality**: Always check for and account for seasonal patterns\n", + "5. **Validation**: Use holdout periods to validate forecasting models\n", + "6. **Business Context**: Interpret results in context of business cycles and events\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.13.3" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/Session_01/PandasDataFrame-exmples/11_string_operation.ipynb b/Session_01/PandasDataFrame-exmples/11_string_operation.ipynb new file mode 100755 index 0000000..33b3770 --- /dev/null +++ b/Session_01/PandasDataFrame-exmples/11_string_operation.ipynb @@ -0,0 +1,1059 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Session 1 - DataFrames - Lesson 11: String Operations and Text Processing\n", + "\n", + "## Learning Objectives\n", + "- Master pandas string methods for text processing\n", + "- Learn regular expressions for pattern matching and extraction\n", + "- Understand text cleaning and standardization techniques\n", + "- Practice with real-world text data scenarios\n", + "- Apply string operations to business data analysis\n", + "\n", + "## Prerequisites\n", + "- Completed Lessons 1-10\n", + "- Basic understanding of regular expressions (helpful but not required)\n", + "- Familiarity with text data challenges" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Import required libraries\n", + "import pandas as pd\n", + "import numpy as np\n", + "import re\n", + "import string\n", + "from datetime import datetime\n", + "import warnings\n", + "warnings.filterwarnings('ignore')\n", + "\n", + "# Set display options\n", + "pd.set_option('display.max_columns', None)\n", + "pd.set_option('display.max_rows', 20)\n", + "pd.set_option('display.max_colwidth', 50)\n", + "\n", + "print(\"Libraries loaded successfully!\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Creating Text-Rich Dataset\n", + "\n", + "Let's create a comprehensive dataset with various text processing challenges." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Create realistic text-rich dataset\n", + "np.random.seed(42)\n", + "\n", + "# Sample data with intentional text issues\n", + "text_data = {\n", + " 'customer_id': range(1, 201),\n", + " 'customer_name': [\n", + " 'John Smith', 'jane doe', 'MARY JOHNSON', 'Bob Wilson Jr.', 'Dr. Sarah Davis',\n", + " 'Mike O\\'Connor', 'Lisa Garcia-Martinez', 'David Miller III', 'Amy Chen', 'Tom Anderson',\n", + " 'Kate Wilson', 'james brown', 'DIANA PRINCE', 'Frank Miller Sr.', 'Prof. Grace Lee',\n", + " 'Henry Davis', 'Ivy Chen-Wang', 'Jack Robinson', 'Olivia Taylor', 'Ryan Clark'\n", + " ] * 10,\n", + " 'email': [\n", + " 'john.smith@email.com', 'JANE.DOE@EMAIL.COM', 'mary@company.org',\n", + " 'bob.wilson@test.co.uk', 'sarah.davis@university.edu', 'mike@work.net',\n", + " 'lisa.garcia@startup.io', 'david@consulting.biz', 'amy.chen@tech.com', 'tom@sales.org',\n", + " 'kate.wilson@design.com', 'james@marketing.net', 'diana@fashion.com',\n", + " 'frank@legal.org', 'grace.lee@research.edu', 'henry@finance.com',\n", + " 'ivy@engineering.tech', 'jack@operations.biz', 'olivia@hr.org', 'ryan@analytics.io'\n", + " ] * 10,\n", + " 'phone': [\n", + " '(555) 123-4567', '555.987.6543', '5551234567', '+1-555-987-6543',\n", + " '(555)123-4567', '555 123 4567', '1-555-987-6543', '555-123-4567',\n", + " '(555) 987 6543', '+15559876543', '555.123.4567', '(555)987-6543',\n", + " '555 987 6543', '1 555 123 4567', '+1 555 987 6543', '5559876543',\n", + " '(555)-123-4567', '555_987_6543', '555/123/4567', '555-987-6543'\n", + " ] * 10,\n", + " 'address': [\n", + " '123 Main St, Anytown, NY 12345', '456 Oak Ave, Boston, MA 02101',\n", + " '789 pine road, los angeles, CA 90210', '321 ELM STREET, Chicago, IL 60601',\n", + " '654 Maple Dr., Houston, TX 77001', '987 Cedar Lane, Phoenix, AZ 85001',\n", + " '147 birch way, Philadelphia, PA 19101', '258 ASH CT, San Antonio, TX 78201',\n", + " '369 Walnut St., San Diego, CA 92101', '741 Cherry Ave, Dallas, TX 75201',\n", + " '852 Spruce Blvd, Austin, TX 73301', '963 Fir Street, Seattle, WA 98101',\n", + " '159 redwood dr, Portland, OR 97201', '357 WILLOW LN, Denver, CO 80201',\n", + " '468 Poplar St., Miami, FL 33101', '579 Hickory Ave, Atlanta, GA 30301',\n", + " '680 magnolia way, Nashville, TN 37201', '791 DOGWOOD CT, Charlotte, NC 28201',\n", + " '802 Palm St., Orlando, FL 32801', '913 Cypress Ave, Tampa, FL 33601'\n", + " ] * 10,\n", + " 'product_reviews': [\n", + " 'Great product! Highly recommend!!!', 'okay product, nothing special',\n", + " 'TERRIBLE! DO NOT BUY!', 'Amazing quality, fast shipping :)', 'Good value for money.',\n", + " 'Poor quality, broke after 1 week :(', 'Excellent customer service!',\n", + " 'average product... could be better', 'LOVE IT! 5 stars!!', 'Not worth the price.',\n", + " 'Perfect! Exactly what I needed.', 'disappointing quality',\n", + " 'OUTSTANDING PRODUCT!!!', 'mediocre at best', 'Fantastic! Will buy again.',\n", + " 'cheap quality, looks fake', 'Superb craftsmanship!',\n", + " 'waste of money', 'Incredible value! Recommended!', 'poor design'\n", + " ] * 10,\n", + " 'job_title': [\n", + " 'Software Engineer', 'data scientist', 'MARKETING MANAGER', 'Sales Rep',\n", + " 'Product Manager', 'business analyst', 'UX DESIGNER', 'DevOps Engineer',\n", + " 'Content Writer', 'project manager', 'FINANCIAL ANALYST', 'HR Specialist',\n", + " 'Operations Manager', 'qa engineer', 'RESEARCH SCIENTIST', 'Account Executive',\n", + " 'Digital Marketer', 'software developer', 'DATA ENGINEER', 'Consultant'\n", + " ] * 10,\n", + " 'company': [\n", + " 'TechCorp Inc.', 'data solutions llc', 'INNOVATIVE SYSTEMS', 'Global Enterprises',\n", + " 'StartupXYZ', 'consulting group ltd', 'FUTURE TECH CO', 'Analytics Pro',\n", + " 'Design Studio', 'enterprise solutions', 'MARKETING MASTERS', 'Software Solutions',\n", + " 'Digital Agency', 'research institute', 'FINANCE FIRM', 'Operations Co.',\n", + " 'Creative Agency', 'tech startup', 'DATA CORP', 'Professional Services'\n", + " ] * 10\n", + "}\n", + "\n", + "df_text = pd.DataFrame(text_data)\n", + "\n", + "print(\"Text-rich dataset created:\")\n", + "print(f\"Shape: {df_text.shape}\")\n", + "print(\"\\nFirst few rows:\")\n", + "print(df_text.head())\n", + "print(\"\\nData types:\")\n", + "print(df_text.dtypes)\n", + "print(\"\\nSample of text issues to address:\")\n", + "print(\"- Inconsistent capitalization\")\n", + "print(\"- Various phone number formats\")\n", + "print(\"- Mixed address formatting\")\n", + "print(\"- Inconsistent email domains\")\n", + "print(\"- Varied punctuation and emoticons in reviews\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 1. Basic String Operations\n", + "\n", + "Fundamental string methods and transformations." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Basic string transformations\n", + "print(\"=== BASIC STRING TRANSFORMATIONS ===\")\n", + "\n", + "# Case transformations\n", + "df_basic = df_text.copy()\n", + "\n", + "# Convert to different cases\n", + "df_basic['customer_name_upper'] = df_basic['customer_name'].str.upper()\n", + "df_basic['customer_name_lower'] = df_basic['customer_name'].str.lower()\n", + "df_basic['customer_name_title'] = df_basic['customer_name'].str.title()\n", + "df_basic['customer_name_capitalize'] = df_basic['customer_name'].str.capitalize()\n", + "\n", + "print(\"Case transformations:\")\n", + "case_cols = ['customer_name', 'customer_name_upper', 'customer_name_lower', \n", + " 'customer_name_title', 'customer_name_capitalize']\n", + "print(df_basic[case_cols].head())\n", + "\n", + "# String length and basic properties\n", + "df_basic['name_length'] = df_basic['customer_name'].str.len()\n", + "df_basic['email_length'] = df_basic['email'].str.len()\n", + "df_basic['review_length'] = df_basic['product_reviews'].str.len()\n", + "\n", + "print(\"\\nString lengths:\")\n", + "print(df_basic[['customer_name', 'name_length', 'email', 'email_length']].head())\n", + "\n", + "print(\"\\nLength statistics:\")\n", + "length_stats = df_basic[['name_length', 'email_length', 'review_length']].describe()\n", + "print(length_stats)\n", + "\n", + "# Check for empty/null strings\n", + "print(\"\\nEmpty string checks:\")\n", + "for col in ['customer_name', 'email', 'phone']:\n", + " empty_count = (df_basic[col].str.strip() == '').sum()\n", + " null_count = df_basic[col].isnull().sum()\n", + " print(f\"{col}: {empty_count} empty strings, {null_count} null values\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# String slicing and indexing\n", + "print(\"=== STRING SLICING AND INDEXING ===\")\n", + "\n", + "# Extract parts of strings\n", + "df_basic['first_char'] = df_basic['customer_name'].str[0]\n", + "df_basic['last_char'] = df_basic['customer_name'].str[-1]\n", + "df_basic['first_three'] = df_basic['customer_name'].str[:3]\n", + "df_basic['last_three'] = df_basic['customer_name'].str[-3:]\n", + "df_basic['middle_chars'] = df_basic['customer_name'].str[2:5]\n", + "\n", + "print(\"String slicing examples:\")\n", + "slice_cols = ['customer_name', 'first_char', 'last_char', 'first_three', 'last_three', 'middle_chars']\n", + "print(df_basic[slice_cols].head(10))\n", + "\n", + "# Extract email domains\n", + "df_basic['email_domain'] = df_basic['email'].str.split('@').str[1]\n", + "df_basic['email_username'] = df_basic['email'].str.split('@').str[0]\n", + "\n", + "print(\"\\nEmail parsing:\")\n", + "print(df_basic[['email', 'email_username', 'email_domain']].head(10))\n", + "\n", + "# Domain analysis\n", + "print(\"\\nEmail domain distribution:\")\n", + "domain_counts = df_basic['email_domain'].value_counts()\n", + "print(domain_counts.head(10))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# String concatenation and joining\n", + "print(\"=== STRING CONCATENATION AND JOINING ===\")\n", + "\n", + "# Simple concatenation\n", + "df_basic['name_email'] = df_basic['customer_name'] + ' - ' + df_basic['email']\n", + "df_basic['initials'] = df_basic['customer_name'].str[0] + '.' + df_basic['customer_name'].str.split().str[1].str[0] + '.'\n", + "\n", + "print(\"String concatenation:\")\n", + "print(df_basic[['customer_name', 'email', 'name_email', 'initials']].head())\n", + "\n", + "# Using str.cat() for more complex joining\n", + "df_basic['full_contact'] = df_basic['customer_name'].str.cat(\n", + " [df_basic['email'], df_basic['phone']], \n", + " sep=' | '\n", + ")\n", + "\n", + "print(\"\\nComplex concatenation:\")\n", + "print(df_basic[['full_contact']].head())\n", + "\n", + "# Conditional concatenation\n", + "df_basic['display_name'] = df_basic.apply(\n", + " lambda row: f\"{row['customer_name']} ({row['job_title']})\" \n", + " if pd.notna(row['job_title']) else row['customer_name'], \n", + " axis=1\n", + ")\n", + "\n", + "print(\"\\nConditional concatenation:\")\n", + "print(df_basic[['customer_name', 'job_title', 'display_name']].head())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 2. Pattern Matching and String Contains\n", + "\n", + "Finding patterns and filtering based on string content." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Basic pattern matching\n", + "print(\"=== BASIC PATTERN MATCHING ===\")\n", + "\n", + "# Check if strings contain specific patterns\n", + "df_patterns = df_text.copy()\n", + "\n", + "# Contains operations\n", + "df_patterns['has_dr_title'] = df_patterns['customer_name'].str.contains('Dr\\.|Prof\\.', case=False, na=False)\n", + "df_patterns['has_jr_sr'] = df_patterns['customer_name'].str.contains('Jr\\.|Sr\\.', case=False, na=False)\n", + "df_patterns['has_hyphen'] = df_patterns['customer_name'].str.contains('-', na=False)\n", + "df_patterns['has_apostrophe'] = df_patterns['customer_name'].str.contains(\"'\", na=False)\n", + "\n", + "print(\"Pattern matching results:\")\n", + "pattern_summary = df_patterns[['has_dr_title', 'has_jr_sr', 'has_hyphen', 'has_apostrophe']].sum()\n", + "print(pattern_summary)\n", + "\n", + "print(\"\\nExamples of names with titles:\")\n", + "title_names = df_patterns[df_patterns['has_dr_title']]['customer_name'].unique()\n", + "print(title_names)\n", + "\n", + "# Email domain patterns\n", + "df_patterns['edu_email'] = df_patterns['email'].str.contains('\\.edu', case=False, na=False)\n", + "df_patterns['com_email'] = df_patterns['email'].str.contains('\\.com', case=False, na=False)\n", + "df_patterns['org_email'] = df_patterns['email'].str.contains('\\.org', case=False, na=False)\n", + "\n", + "print(\"\\nEmail domain patterns:\")\n", + "domain_pattern_summary = df_patterns[['edu_email', 'com_email', 'org_email']].sum()\n", + "print(domain_pattern_summary)\n", + "\n", + "# Review sentiment patterns\n", + "df_patterns['positive_review'] = df_patterns['product_reviews'].str.contains(\n", + " 'great|excellent|amazing|fantastic|love|perfect|outstanding|superb|incredible', \n", + " case=False, na=False\n", + ")\n", + "df_patterns['negative_review'] = df_patterns['product_reviews'].str.contains(\n", + " 'terrible|poor|disappointing|waste|cheap|broke|fake|mediocre', \n", + " case=False, na=False\n", + ")\n", + "\n", + "print(\"\\nReview sentiment patterns:\")\n", + "sentiment_summary = df_patterns[['positive_review', 'negative_review']].sum()\n", + "print(sentiment_summary)\n", + "\n", + "print(\"\\nSample positive reviews:\")\n", + "positive_reviews = df_patterns[df_patterns['positive_review']]['product_reviews'].head(5)\n", + "for review in positive_reviews:\n", + " print(f\"- {review}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Advanced pattern matching with startswith/endswith\n", + "print(\"=== STARTSWITH/ENDSWITH PATTERNS ===\")\n", + "\n", + "# Check beginnings and endings\n", + "df_patterns['starts_with_vowel'] = df_patterns['customer_name'].str.lower().str.startswith(('a', 'e', 'i', 'o', 'u'))\n", + "df_patterns['ends_with_son'] = df_patterns['customer_name'].str.lower().str.endswith('son')\n", + "df_patterns['job_starts_data'] = df_patterns['job_title'].str.lower().str.startswith('data')\n", + "df_patterns['company_ends_inc'] = df_patterns['company'].str.lower().str.endswith(('inc', 'inc.', 'llc', 'ltd', 'co', 'co.'))\n", + "\n", + "print(\"Start/End pattern results:\")\n", + "start_end_summary = df_patterns[['starts_with_vowel', 'ends_with_son', 'job_starts_data', 'company_ends_inc']].sum()\n", + "print(start_end_summary)\n", + "\n", + "print(\"\\nNames starting with vowels:\")\n", + "vowel_names = df_patterns[df_patterns['starts_with_vowel']]['customer_name'].unique()[:10]\n", + "print(vowel_names)\n", + "\n", + "print(\"\\nData-related job titles:\")\n", + "data_jobs = df_patterns[df_patterns['job_starts_data']]['job_title'].unique()\n", + "print(data_jobs)\n", + "\n", + "# Phone number format detection\n", + "df_patterns['phone_parentheses'] = df_patterns['phone'].str.contains(r'\\(\\d{3}\\)', na=False)\n", + "df_patterns['phone_dashes'] = df_patterns['phone'].str.contains(r'\\d{3}-\\d{3}-\\d{4}', na=False)\n", + "df_patterns['phone_dots'] = df_patterns['phone'].str.contains(r'\\d{3}\\.\\d{3}\\.\\d{4}', na=False)\n", + "df_patterns['phone_spaces'] = df_patterns['phone'].str.contains(r'\\d{3}\\s\\d{3}\\s\\d{4}', na=False)\n", + "\n", + "print(\"\\nPhone number format patterns:\")\n", + "phone_format_summary = df_patterns[['phone_parentheses', 'phone_dashes', 'phone_dots', 'phone_spaces']].sum()\n", + "print(phone_format_summary)\n", + "\n", + "print(\"\\nSample phone formats:\")\n", + "for format_type in ['phone_parentheses', 'phone_dashes', 'phone_dots', 'phone_spaces']:\n", + " sample = df_patterns[df_patterns[format_type]]['phone'].iloc[0] if df_patterns[format_type].any() else 'None'\n", + " print(f\"{format_type}: {sample}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 3. Regular Expressions\n", + "\n", + "Advanced pattern matching using regular expressions." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Regular expression basics\n", + "print(\"=== REGULAR EXPRESSION BASICS ===\")\n", + "\n", + "df_regex = df_text.copy()\n", + "\n", + "# Extract patterns using regex\n", + "# Extract phone numbers (various formats)\n", + "phone_pattern = r'\\(?\\d{3}\\)?[-.\\s]?\\d{3}[-.\\s]?\\d{4}'\n", + "df_regex['extracted_phone'] = df_regex['phone'].str.extract(f'({phone_pattern})')\n", + "\n", + "print(\"Phone number extraction:\")\n", + "print(df_regex[['phone', 'extracted_phone']].head(10))\n", + "\n", + "# Extract ZIP codes from addresses\n", + "zip_pattern = r'\\b\\d{5}\\b'\n", + "df_regex['zip_code'] = df_regex['address'].str.extract(f'({zip_pattern})')\n", + "\n", + "print(\"\\nZIP code extraction:\")\n", + "print(df_regex[['address', 'zip_code']].head(10))\n", + "\n", + "# Extract state abbreviations\n", + "state_pattern = r'\\b[A-Z]{2}\\b'\n", + "df_regex['state'] = df_regex['address'].str.extract(f'({state_pattern})')\n", + "\n", + "print(\"\\nState extraction:\")\n", + "print(df_regex[['address', 'state']].head(10))\n", + "\n", + "# Count digits in strings\n", + "df_regex['digit_count'] = df_regex['phone'].str.count(r'\\d')\n", + "df_regex['letter_count'] = df_regex['customer_name'].str.count(r'[a-zA-Z]')\n", + "\n", + "print(\"\\nCharacter counting:\")\n", + "print(df_regex[['phone', 'digit_count', 'customer_name', 'letter_count']].head())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Advanced regex patterns\n", + "print(\"=== ADVANCED REGEX PATTERNS ===\")\n", + "\n", + "# Extract all email components\n", + "email_pattern = r'([a-zA-Z0-9._%+-]+)@([a-zA-Z0-9.-]+)\\.([a-zA-Z]{2,})'\n", + "email_parts = df_regex['email'].str.extract(email_pattern)\n", + "email_parts.columns = ['username', 'domain', 'tld']\n", + "\n", + "df_regex = pd.concat([df_regex, email_parts], axis=1)\n", + "\n", + "print(\"Email component extraction:\")\n", + "print(df_regex[['email', 'username', 'domain', 'tld']].head(10))\n", + "\n", + "# Extract multiple phone number parts\n", + "phone_parts_pattern = r'\\(??(\\d{3})\\)?[-.,\\s]?(\\d{3})[-.,\\s]?(\\d{4})'\n", + "phone_parts = df_regex['phone'].str.extract(phone_parts_pattern)\n", + "phone_parts.columns = ['area_code', 'exchange', 'number']\n", + "\n", + "df_regex = pd.concat([df_regex, phone_parts], axis=1)\n", + "\n", + "print(\"\\nPhone number component extraction:\")\n", + "print(df_regex[['phone', 'area_code', 'exchange', 'number']].head(10))\n", + "\n", + "# Extract address components\n", + "address_pattern = r'(\\d+)\\s+(.+?)\\s*,\\s*(.+?)\\s*,\\s*([A-Z]{2})\\s+(\\d{5})'\n", + "address_parts = df_regex['address'].str.extract(address_pattern)\n", + "address_parts.columns = ['street_number', 'street_name', 'city', 'state_extracted', 'zip_extracted']\n", + "\n", + "print(\"\\nAddress component extraction (first 5):\")\n", + "print(address_parts.head())\n", + "\n", + "# Find all matches (not just first)\n", + "# Find all capitalized words in names\n", + "df_regex['capitalized_words'] = df_regex['customer_name'].str.findall(r'\\b[A-Z][a-z]+\\b')\n", + "df_regex['num_capitalized'] = df_regex['capitalized_words'].str.len()\n", + "\n", + "print(\"\\nCapitalized words in names:\")\n", + "print(df_regex[['customer_name', 'capitalized_words', 'num_capitalized']].head(10))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Regex replacement and cleaning\n", + "print(\"=== REGEX REPLACEMENT AND CLEANING ===\")\n", + "\n", + "# Clean phone numbers to standard format\n", + "def clean_phone(phone_str):\n", + " \"\"\"Clean phone number to standard format\"\"\"\n", + " if pd.isna(phone_str):\n", + " return None\n", + " # Remove all non-digits\n", + " digits = re.sub(r'\\D', '', phone_str)\n", + " # Handle different lengths\n", + " if len(digits) == 10:\n", + " return f\"({digits[:3]}) {digits[3:6]}-{digits[6:]}\"\n", + " elif len(digits) == 11 and digits.startswith('1'):\n", + " return f\"({digits[1:4]}) {digits[4:7]}-{digits[7:]}\"\n", + " else:\n", + " return 'Invalid'\n", + "\n", + "df_regex['phone_cleaned'] = df_regex['phone'].apply(clean_phone)\n", + "\n", + "print(\"Phone number cleaning:\")\n", + "phone_cleaning_sample = df_regex[['phone', 'phone_cleaned']].head(15)\n", + "print(phone_cleaning_sample)\n", + "\n", + "# Remove punctuation from reviews\n", + "df_regex['review_no_punct'] = df_regex['product_reviews'].str.replace(r'[^\\w\\s]', ' ', regex=True)\n", + "df_regex['review_clean'] = df_regex['review_no_punct'].str.replace(r'\\s+', ' ', regex=True).str.strip()\n", + "\n", + "print(\"\\nReview cleaning:\")\n", + "review_cleaning_sample = df_regex[['product_reviews', 'review_clean']].head(5)\n", + "for idx, row in review_cleaning_sample.iterrows():\n", + " print(f\"Original: {row['product_reviews']}\")\n", + " print(f\"Cleaned: {row['review_clean']}\")\n", + " print()\n", + "\n", + "# Standardize company names\n", + "df_regex['company_clean'] = (\n", + " df_regex['company']\n", + " .str.replace(r'\\binc\\.?\\b', 'Inc.', case=False, regex=True)\n", + " .str.replace(r'\\bllc\\b', 'LLC', case=False, regex=True)\n", + " .str.replace(r'\\bltd\\.?\\b', 'Ltd.', case=False, regex=True)\n", + " .str.replace(r'\\bco\\.?\\b', 'Co.', case=False, regex=True)\n", + " .str.title()\n", + ")\n", + "\n", + "print(\"Company name standardization:\")\n", + "company_sample = df_regex[['company', 'company_clean']].head(10)\n", + "print(company_sample)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 4. Text Cleaning and Standardization\n", + "\n", + "Comprehensive text cleaning workflows." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Comprehensive text cleaning pipeline\n", + "print(\"=== COMPREHENSIVE TEXT CLEANING ===\")\n", + "\n", + "def clean_text_comprehensive(df):\n", + " \"\"\"Comprehensive text cleaning pipeline\"\"\"\n", + " df_clean = df.copy()\n", + " \n", + " # 1. Clean customer names\n", + " df_clean['customer_name_clean'] = (\n", + " df_clean['customer_name']\n", + " .str.strip() # Remove leading/trailing whitespace\n", + " .str.replace(r'\\s+', ' ', regex=True) # Replace multiple spaces with single space\n", + " .str.title() # Title case\n", + " .str.replace(r'\\bDr\\.', 'Dr.', regex=True) # Standardize titles\n", + " .str.replace(r'\\bProf\\.', 'Prof.', regex=True)\n", + " .str.replace(r'\\bJr\\.', 'Jr.', regex=True)\n", + " .str.replace(r'\\bSr\\.', 'Sr.', regex=True)\n", + " )\n", + " \n", + " # 2. Clean and standardize emails\n", + " df_clean['email_clean'] = (\n", + " df_clean['email']\n", + " .str.strip()\n", + " .str.lower() # Lowercase for emails\n", + " .str.replace(r'\\s+', '', regex=True) # Remove any spaces\n", + " )\n", + " \n", + " # 3. Standardize phone numbers\n", + " df_clean['phone_clean'] = df_clean['phone'].apply(clean_phone)\n", + " \n", + " # 4. Clean addresses\n", + " df_clean['address_clean'] = (\n", + " df_clean['address']\n", + " .str.strip()\n", + " .str.title() # Title case\n", + " .str.replace(r'\\bSt\\.?\\b', 'St.', regex=True) # Standardize street abbreviations\n", + " .str.replace(r'\\bAve\\.?\\b', 'Ave.', regex=True)\n", + " .str.replace(r'\\bRd\\.?\\b', 'Rd.', regex=True)\n", + " .str.replace(r'\\bDr\\.?\\b', 'Dr.', regex=True)\n", + " .str.replace(r'\\bLn\\.?\\b', 'Ln.', regex=True)\n", + " .str.replace(r'\\bCt\\.?\\b', 'Ct.', regex=True)\n", + " .str.replace(r'\\bBlvd\\.?\\b', 'Blvd.', regex=True)\n", + " .str.replace(r'\\s+', ' ', regex=True) # Multiple spaces to single\n", + " )\n", + " \n", + " # 5. Clean job titles\n", + " df_clean['job_title_clean'] = (\n", + " df_clean['job_title']\n", + " .str.strip()\n", + " .str.title()\n", + " .str.replace(r'\\bQa\\b', 'QA', regex=True) # Specific corrections\n", + " .str.replace(r'\\bUx\\b', 'UX', regex=True)\n", + " .str.replace(r'\\bHr\\b', 'HR', regex=True)\n", + " )\n", + " \n", + " # 6. Clean company names\n", + " df_clean['company_clean'] = (\n", + " df_clean['company']\n", + " .str.strip()\n", + " .str.title()\n", + " .str.replace(r'\\binc\\.?\\b', 'Inc.', case=False, regex=True)\n", + " .str.replace(r'\\bllc\\b', 'LLC', case=False, regex=True)\n", + " .str.replace(r'\\bltd\\.?\\b', 'Ltd.', case=False, regex=True)\n", + " .str.replace(r'\\bco\\.?\\b', 'Co.', case=False, regex=True)\n", + " )\n", + " \n", + " return df_clean\n", + "\n", + "# Apply comprehensive cleaning\n", + "df_comprehensive = clean_text_comprehensive(df_text)\n", + "\n", + "print(\"Comprehensive cleaning results:\")\n", + "# Show before/after comparison\n", + "comparison_cols = [\n", + " ('customer_name', 'customer_name_clean'),\n", + " ('email', 'email_clean'),\n", + " ('phone', 'phone_clean'),\n", + " ('job_title', 'job_title_clean'),\n", + " ('company', 'company_clean')\n", + "]\n", + "\n", + "for original, cleaned in comparison_cols:\n", + " print(f\"\\n{original.upper()} CLEANING:\")\n", + " sample = df_comprehensive[[original, cleaned]].head(5)\n", + " for idx, row in sample.iterrows():\n", + " print(f\" Before: {row[original]}\")\n", + " print(f\" After: {row[cleaned]}\")\n", + " print()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Text standardization and validation\n", + "print(\"=== TEXT STANDARDIZATION AND VALIDATION ===\")\n", + "\n", + "def validate_cleaned_data(df):\n", + " \"\"\"Validate cleaned data quality\"\"\"\n", + " validation_results = {}\n", + " \n", + " # Email validation\n", + " email_pattern = r'^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}$'\n", + " valid_emails = df['email_clean'].str.match(email_pattern, na=False)\n", + " validation_results['valid_emails'] = {\n", + " 'total': len(df),\n", + " 'valid': valid_emails.sum(),\n", + " 'invalid': (~valid_emails).sum(),\n", + " 'percentage_valid': (valid_emails.sum() / len(df)) * 100\n", + " }\n", + " \n", + " # Phone validation\n", + " valid_phones = df['phone_clean'] != 'Invalid'\n", + " validation_results['valid_phones'] = {\n", + " 'total': len(df),\n", + " 'valid': valid_phones.sum(),\n", + " 'invalid': (~valid_phones).sum(),\n", + " 'percentage_valid': (valid_phones.sum() / len(df)) * 100\n", + " }\n", + " \n", + " # Name validation (no numbers, reasonable length)\n", + " valid_names = (\n", + " df['customer_name_clean'].str.len().between(2, 50) &\n", + " ~df['customer_name_clean'].str.contains(r'\\d', na=False)\n", + " )\n", + " validation_results['valid_names'] = {\n", + " 'total': len(df),\n", + " 'valid': valid_names.sum(),\n", + " 'invalid': (~valid_names).sum(),\n", + " 'percentage_valid': (valid_names.sum() / len(df)) * 100\n", + " }\n", + " \n", + " return validation_results\n", + "\n", + "# Validate cleaned data\n", + "validation_results = validate_cleaned_data(df_comprehensive)\n", + "\n", + "print(\"Data validation results:\")\n", + "for field, results in validation_results.items():\n", + " print(f\"\\n{field.upper()}:\")\n", + " print(f\" Total records: {results['total']}\")\n", + " print(f\" Valid: {results['valid']} ({results['percentage_valid']:.1f}%)\")\n", + " print(f\" Invalid: {results['invalid']}\")\n", + "\n", + "# Show some invalid examples\n", + "print(\"\\nExamples of invalid data:\")\n", + "invalid_emails = df_comprehensive[~df_comprehensive['email_clean'].str.match(r'^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}$', na=False)]\n", + "if len(invalid_emails) > 0:\n", + " print(f\"Invalid emails: {invalid_emails['email_clean'].head(3).tolist()}\")\n", + "\n", + "invalid_phones = df_comprehensive[df_comprehensive['phone_clean'] == 'Invalid']\n", + "if len(invalid_phones) > 0:\n", + " print(f\"Invalid phones: {invalid_phones['phone'].head(3).tolist()}\")\n", + "\n", + "# Generate data quality summary\n", + "overall_quality = np.mean([results['percentage_valid'] for results in validation_results.values()])\n", + "print(f\"\\nOverall data quality score: {overall_quality:.1f}%\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 5. Text Analysis and Insights\n", + "\n", + "Extracting business insights from text data." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Text analysis for business insights\n", + "print(\"=== TEXT ANALYSIS FOR BUSINESS INSIGHTS ===\")\n", + "\n", + "def analyze_text_patterns(df):\n", + " \"\"\"Analyze text patterns for business insights\"\"\"\n", + " analysis = {}\n", + " \n", + " # 1. Name analysis\n", + " analysis['name_insights'] = {\n", + " 'avg_name_length': df['customer_name_clean'].str.len().mean(),\n", + " 'names_with_titles': df['customer_name_clean'].str.contains(r'Dr\\.|Prof\\.|Mr\\.|Ms\\.|Mrs\\.').sum(),\n", + " 'names_with_suffixes': df['customer_name_clean'].str.contains(r'Jr\\.|Sr\\.|III|II').sum(),\n", + " 'hyphenated_names': df['customer_name_clean'].str.contains('-').sum(),\n", + " 'most_common_first_names': df['customer_name_clean'].str.split().str[0].value_counts().head(5)\n", + " }\n", + " \n", + " # 2. Email domain analysis\n", + " domains = df['email_clean'].str.split('@').str[1]\n", + " analysis['email_insights'] = {\n", + " 'total_unique_domains': domains.nunique(),\n", + " 'top_domains': domains.value_counts().head(10),\n", + " 'edu_domains': domains.str.endswith('.edu').sum(),\n", + " 'com_domains': domains.str.endswith('.com').sum(),\n", + " 'org_domains': domains.str.endswith('.org').sum()\n", + " }\n", + " \n", + " # 3. Geographic analysis from addresses\n", + " states = df['address_clean'].str.extract(r'\\b([A-Z]{2})\\s+\\d{5}')[0]\n", + " analysis['geographic_insights'] = {\n", + " 'unique_states': states.nunique(),\n", + " 'top_states': states.value_counts().head(10),\n", + " 'coastal_states': states.isin(['CA', 'NY', 'FL', 'WA', 'OR']).sum()\n", + " }\n", + " \n", + " # 4. Job title analysis\n", + " analysis['job_insights'] = {\n", + " 'unique_job_titles': df['job_title_clean'].nunique(),\n", + " 'top_job_titles': df['job_title_clean'].value_counts().head(10),\n", + " 'tech_jobs': df['job_title_clean'].str.contains('Engineer|Developer|Data|Software', case=False).sum(),\n", + " 'management_jobs': df['job_title_clean'].str.contains('Manager|Director|VP|President', case=False).sum()\n", + " }\n", + " \n", + " # 5. Company analysis\n", + " analysis['company_insights'] = {\n", + " 'unique_companies': df['company_clean'].nunique(),\n", + " 'top_companies': df['company_clean'].value_counts().head(10),\n", + " 'inc_companies': df['company_clean'].str.contains('Inc\\.').sum(),\n", + " 'llc_companies': df['company_clean'].str.contains('LLC').sum(),\n", + " 'startups': df['company_clean'].str.contains('Startup|startup', case=False).sum()\n", + " }\n", + " \n", + " return analysis\n", + "\n", + "# Perform text analysis\n", + "text_analysis = analyze_text_patterns(df_comprehensive)\n", + "\n", + "print(\"TEXT ANALYSIS RESULTS:\")\n", + "\n", + "print(\"\\n1. NAME INSIGHTS:\")\n", + "name_insights = text_analysis['name_insights']\n", + "print(f\" Average name length: {name_insights['avg_name_length']:.1f} characters\")\n", + "print(f\" Names with titles: {name_insights['names_with_titles']}\")\n", + "print(f\" Names with suffixes: {name_insights['names_with_suffixes']}\")\n", + "print(f\" Hyphenated names: {name_insights['hyphenated_names']}\")\n", + "print(\" Most common first names:\")\n", + "for name, count in name_insights['most_common_first_names'].items():\n", + " print(f\" {name}: {count}\")\n", + "\n", + "print(\"\\n2. EMAIL INSIGHTS:\")\n", + "email_insights = text_analysis['email_insights']\n", + "print(f\" Unique domains: {email_insights['total_unique_domains']}\")\n", + "print(f\" .edu domains: {email_insights['edu_domains']}\")\n", + "print(f\" .com domains: {email_insights['com_domains']}\")\n", + "print(f\" .org domains: {email_insights['org_domains']}\")\n", + "print(\" Top domains:\")\n", + "for domain, count in email_insights['top_domains'].head(5).items():\n", + " print(f\" {domain}: {count}\")\n", + "\n", + "print(\"\\n3. GEOGRAPHIC INSIGHTS:\")\n", + "geo_insights = text_analysis['geographic_insights']\n", + "print(f\" Unique states: {geo_insights['unique_states']}\")\n", + "print(f\" Coastal states: {geo_insights['coastal_states']}\")\n", + "print(\" Top states:\")\n", + "for state, count in geo_insights['top_states'].head(5).items():\n", + " print(f\" {state}: {count}\")\n", + "\n", + "print(\"\\n4. JOB INSIGHTS:\")\n", + "job_insights = text_analysis['job_insights']\n", + "print(f\" Unique job titles: {job_insights['unique_job_titles']}\")\n", + "print(f\" Tech jobs: {job_insights['tech_jobs']}\")\n", + "print(f\" Management jobs: {job_insights['management_jobs']}\")\n", + "\n", + "print(\"\\n5. COMPANY INSIGHTS:\")\n", + "company_insights = text_analysis['company_insights']\n", + "print(f\" Unique companies: {company_insights['unique_companies']}\")\n", + "print(f\" Inc. companies: {company_insights['inc_companies']}\")\n", + "print(f\" LLC companies: {company_insights['llc_companies']}\")\n", + "print(f\" Startups: {company_insights['startups']}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Sentiment analysis of product reviews\n", + "print(\"=== SENTIMENT ANALYSIS OF REVIEWS ===\")\n", + "\n", + "def analyze_review_sentiment(df):\n", + " \"\"\"Analyze sentiment in product reviews\"\"\"\n", + " # Define sentiment word lists\n", + " positive_words = [\n", + " 'great', 'excellent', 'amazing', 'fantastic', 'love', 'perfect', \n", + " 'outstanding', 'superb', 'incredible', 'wonderful', 'awesome', \n", + " 'brilliant', 'impressive', 'remarkable', 'exceptional'\n", + " ]\n", + " \n", + " negative_words = [\n", + " 'terrible', 'poor', 'disappointing', 'waste', 'cheap', 'broke', \n", + " 'fake', 'mediocre', 'awful', 'horrible', 'useless', 'worst', \n", + " 'defective', 'junk', 'garbage'\n", + " ]\n", + " \n", + " # Create patterns\n", + " positive_pattern = '|'.join(positive_words)\n", + " negative_pattern = '|'.join(negative_words)\n", + " \n", + " # Count sentiment words\n", + " df['positive_word_count'] = df['product_reviews'].str.lower().str.count(positive_pattern)\n", + " df['negative_word_count'] = df['product_reviews'].str.lower().str.count(negative_pattern)\n", + " \n", + " # Calculate sentiment score\n", + " df['sentiment_score'] = df['positive_word_count'] - df['negative_word_count']\n", + " \n", + " # Categorize sentiment\n", + " def categorize_sentiment(score):\n", + " if score > 0:\n", + " return 'Positive'\n", + " elif score < 0:\n", + " return 'Negative'\n", + " else:\n", + " return 'Neutral'\n", + " \n", + " df['sentiment_category'] = df['sentiment_score'].apply(categorize_sentiment)\n", + " \n", + " # Additional features\n", + " df['has_exclamation'] = df['product_reviews'].str.contains('!').astype(int)\n", + " df['has_caps'] = df['product_reviews'].str.contains(r'[A-Z]{3,}').astype(int)\n", + " df['review_word_count'] = df['product_reviews'].str.split().str.len()\n", + " \n", + " return df\n", + "\n", + "# Analyze sentiment\n", + "df_sentiment = analyze_review_sentiment(df_comprehensive.copy())\n", + "\n", + "print(\"Sentiment analysis results:\")\n", + "sentiment_summary = df_sentiment['sentiment_category'].value_counts()\n", + "print(sentiment_summary)\n", + "print(f\"\\nSentiment distribution:\")\n", + "for category, count in sentiment_summary.items():\n", + " percentage = (count / len(df_sentiment)) * 100\n", + " print(f\" {category}: {count} ({percentage:.1f}%)\")\n", + "\n", + "print(\"\\nSentiment score statistics:\")\n", + "print(df_sentiment['sentiment_score'].describe())\n", + "\n", + "print(\"\\nSample reviews by sentiment:\")\n", + "for sentiment in ['Positive', 'Negative', 'Neutral']:\n", + " sample_reviews = df_sentiment[df_sentiment['sentiment_category'] == sentiment]['product_reviews'].head(2)\n", + " print(f\"\\n{sentiment} reviews:\")\n", + " for review in sample_reviews:\n", + " print(f\" - {review}\")\n", + "\n", + "# Correlation analysis\n", + "print(\"\\nCorrelation between text features:\")\n", + "text_features = ['positive_word_count', 'negative_word_count', 'sentiment_score', \n", + " 'has_exclamation', 'has_caps', 'review_word_count']\n", + "correlation_matrix = df_sentiment[text_features].corr()\n", + "print(correlation_matrix.round(3))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Practice Exercises\n", + "\n", + "Apply string operations to complex text processing scenarios:" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "# Exercise 1: Advanced Text Cleaning Pipeline\n", + "# Create a comprehensive text cleaning and validation system:\n", + "# - Handle international characters and encoding issues\n", + "# - Implement fuzzy matching for duplicate detection\n", + "# - Create data quality scoring system\n", + "# - Generate cleaning reports with statistics\n", + "\n", + "def advanced_text_cleaning_pipeline(df):\n", + " \"\"\"Advanced text cleaning with international support and validation\"\"\"\n", + " # Your implementation here\n", + " pass\n", + "\n", + "# cleaned_df = advanced_text_cleaning_pipeline(df_text)\n", + "# print(\"Advanced text cleaning completed\")" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [], + "source": [ + "# Exercise 2: Text Mining and Information Extraction\n", + "# Extract structured information from unstructured text:\n", + "# - Extract entities (names, organizations, locations)\n", + "# - Parse complex address formats\n", + "# - Identify and extract contact information\n", + "# - Create knowledge graphs from text relationships\n", + "\n", + "# Your code here:\n" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [], + "source": [ + "# Exercise 3: Business Intelligence from Text\n", + "# Create business insights from text analysis:\n", + "# - Customer segmentation based on text patterns\n", + "# - Market analysis from company and job data\n", + "# - Geographic market penetration analysis\n", + "# - Competitive intelligence from text data\n", + "\n", + "# Your code here:\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Key Takeaways\n", + "\n", + "1. **String Accessor (`.str`)**:\n", + " - Essential for all pandas string operations\n", + " - Works with Series containing strings\n", + " - Handles NaN values gracefully\n", + "\n", + "2. **Basic Operations**:\n", + " - **Case**: `.upper()`, `.lower()`, `.title()`, `.capitalize()`\n", + " - **Length**: `.len()`\n", + " - **Slicing**: `.str[start:end]`\n", + " - **Splitting**: `.str.split()`\n", + "\n", + "3. **Pattern Matching**:\n", + " - **Contains**: `.str.contains()` for pattern detection\n", + " - **Startswith/Endswith**: `.str.startswith()`, `.str.endswith()`\n", + " - **Regular Expressions**: Use `regex=True` parameter\n", + "\n", + "4. **Text Cleaning**:\n", + " - **Replace**: `.str.replace()` for substitution\n", + " - **Strip**: `.str.strip()` for whitespace removal\n", + " - **Extract**: `.str.extract()` for regex pattern extraction\n", + "\n", + "## String Operations Quick Reference\n", + "\n", + "```python\n", + "# Basic transformations\n", + "df['col'].str.upper() # Uppercase\n", + "df['col'].str.lower() # Lowercase\n", + "df['col'].str.title() # Title Case\n", + "df['col'].str.len() # String length\n", + "df['col'].str.strip() # Remove whitespace\n", + "\n", + "# Pattern matching\n", + "df['col'].str.contains('pattern') # Check if contains\n", + "df['col'].str.startswith('prefix') # Check if starts with\n", + "df['col'].str.endswith('suffix') # Check if ends with\n", + "\n", + "# Extraction and replacement\n", + "df['col'].str.extract(r'(\\d+)') # Extract pattern\n", + "df['col'].str.replace('old', 'new') # Replace text\n", + "df['col'].str.split('delimiter') # Split string\n", + "\n", + "# Advanced regex\n", + "df['col'].str.findall(r'\\b\\w+\\b') # Find all matches\n", + "df['col'].str.count(r'\\d') # Count pattern occurrences\n", + "```\n", + "\n", + "## Common Text Cleaning Patterns\n", + "\n", + "| Task | Pattern | Example |\n", + "|------|---------|----------|\n", + "| Remove punctuation | `r'[^\\w\\s]'` | `str.replace(r'[^\\w\\s]', '', regex=True)` |\n", + "| Extract digits | `r'\\d+'` | `str.extract(r'(\\d+)')` |\n", + "| Clean phone numbers | `r'\\D'` | `str.replace(r'\\D', '', regex=True)` |\n", + "| Extract email parts | `r'([^@]+)@(.+)'` | `str.extract(r'([^@]+)@(.+)')` |\n", + "| Standardize whitespace | `r'\\s+'` | `str.replace(r'\\s+', ' ', regex=True)` |\n", + "\n", + "## Best Practices\n", + "\n", + "1. **Data Validation**: Always validate cleaned data\n", + "2. **Preserve Originals**: Keep original columns during cleaning\n", + "3. **Handle Edge Cases**: Plan for missing values and unusual formats\n", + "4. **Performance**: Use vectorized operations instead of apply() when possible\n", + "5. **Documentation**: Document cleaning rules and business logic\n", + "6. **Testing**: Test regex patterns thoroughly with edge cases\n", + "\n", + "## Business Applications\n", + "\n", + "- **Customer Data Cleaning**: Standardize names, addresses, contacts\n", + "- **Market Research**: Analyze company names and domains\n", + "- **Sentiment Analysis**: Process customer reviews and feedback\n", + "- **Data Integration**: Clean and match data from multiple sources\n", + "- **Compliance**: Standardize data for regulatory requirements" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.13.3" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/Session_01/PandasDataFrame-exmples/12_data_visualization.ipynb b/Session_01/PandasDataFrame-exmples/12_data_visualization.ipynb new file mode 100755 index 0000000..49bf35e --- /dev/null +++ b/Session_01/PandasDataFrame-exmples/12_data_visualization.ipynb @@ -0,0 +1,1126 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Session 1 - DataFrames - Lesson 12: Data Visualization with Pandas\n", + "\n", + "## Learning Objectives\n", + "- Learn to create plots directly from pandas DataFrames\n", + "- Master different types of visualizations (line, bar, scatter, histogram, etc.)\n", + "- Understand customization options for pandas plots\n", + "- Practice with real-world visualization scenarios\n", + "- Learn when to use pandas vs matplotlib/seaborn\n", + "\n", + "## Prerequisites\n", + "- Completed previous lessons on DataFrames\n", + "- Basic understanding of data analysis concepts\n", + "- matplotlib will be used as the backend" + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Libraries loaded successfully!\n" + ] + } + ], + "source": [ + "# Import required libraries\n", + "import pandas as pd\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "from datetime import datetime, timedelta\n", + "import warnings\n", + "warnings.filterwarnings('ignore')\n", + "\n", + "# Set matplotlib style and pandas plotting backend\n", + "plt.style.use('seaborn-v0_8')\n", + "pd.plotting.register_matplotlib_converters()\n", + "\n", + "# Display settings\n", + "%matplotlib inline\n", + "plt.rcParams['figure.figsize'] = (10, 6)\n", + "plt.rcParams['font.size'] = 12\n", + "\n", + "print(\"Libraries loaded successfully!\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Creating Sample Datasets\n", + "\n", + "Let's create multiple datasets for different visualization examples." + ] + }, + { + "cell_type": "code", + "execution_count": 53, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Sample datasets created:\n", + "Sales data: (100, 5)\n", + "Stock data: (90, 4)\n", + "\n", + "Sales data preview:\n", + " Date Product Sales Region Salesperson\n", + "0 2024-01-01 Tablet 1147 South Charlie\n", + "1 2024-01-02 Monitor 1034 West Diana\n", + "2 2024-01-03 Laptop 976 North Charlie\n", + "3 2024-01-04 Tablet 939 West Alice\n", + "4 2024-01-05 Tablet 704 West Alice\n" + ] + } + ], + "source": [ + "# Sales dataset\n", + "np.random.seed(42)\n", + "dates = pd.date_range('2024-01-01', periods=100, freq='D')\n", + "sales_data = {\n", + " 'Date': dates,\n", + " 'Product': np.random.choice(['Laptop', 'Phone', 'Tablet', 'Monitor'], 100),\n", + " 'Sales': np.random.normal(1000, 200, 100).astype(int),\n", + " 'Region': np.random.choice(['North', 'South', 'East', 'West'], 100),\n", + " 'Salesperson': np.random.choice(['Alice', 'Bob', 'Charlie', 'Diana', 'Eve'], 100)\n", + "}\n", + "df_sales = pd.DataFrame(sales_data)\n", + "df_sales['Sales'] = np.abs(df_sales['Sales']) # Ensure positive values\n", + "\n", + "# Stock prices dataset\n", + "stock_dates = pd.date_range('2024-01-01', periods=90, freq='D')\n", + "stock_data = {\n", + " 'Date': stock_dates,\n", + " 'AAPL': 150 + np.cumsum(np.random.normal(0, 2, 90)),\n", + " 'GOOGL': 120 + np.cumsum(np.random.normal(0, 1.5, 90)),\n", + " 'MSFT': 300 + np.cumsum(np.random.normal(0, 3, 90)),\n", + " 'TSLA': 200 + np.cumsum(np.random.normal(0, 5, 90))\n", + "}\n", + "df_stocks = pd.DataFrame(stock_data)\n", + "df_stocks.set_index('Date', inplace=True)\n", + "\n", + "print(\"Sample datasets created:\")\n", + "print(f\"Sales data: {df_sales.shape}\")\n", + "print(f\"Stock data: {df_stocks.shape}\")\n", + "print(\"\\nSales data preview:\")\n", + "print(df_sales.head())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 1. Basic Line Plots\n", + "\n", + "Line plots are perfect for time series data and showing trends." + ] + }, + { + "cell_type": "code", + "execution_count": 54, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Basic line plot of stock prices:\n" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Single stock line plot:\n" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Simple line plot\n", + "print(\"Basic line plot of stock prices:\")\n", + "df_stocks.plot()\n", + "plt.title('Stock Prices Over Time')\n", + "plt.xlabel('Date')\n", + "plt.ylabel('Price ($)')\n", + "plt.legend(title='Stock')\n", + "plt.grid(True, alpha=0.3)\n", + "plt.tight_layout()\n", + "plt.show()\n", + "\n", + "# Single column line plot\n", + "print(\"\\nSingle stock line plot:\")\n", + "df_stocks['AAPL'].plot(color='blue', linewidth=2)\n", + "plt.title('Apple Stock Price')\n", + "plt.ylabel('Price ($)')\n", + "plt.grid(True, alpha=0.3)\n", + "plt.tight_layout()\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Daily sales with 7-day rolling average:\n" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Daily sales trend\n", + "daily_sales = df_sales.groupby('Date')['Sales'].sum()\n", + "\n", + "daily_sales.plot(kind='line', marker='o', markersize=4)\n", + "plt.title('Daily Sales Trend')\n", + "plt.xlabel('Date')\n", + "plt.ylabel('Total Sales ($)')\n", + "plt.grid(True, alpha=0.3)\n", + "plt.tight_layout()\n", + "plt.show()\n", + "\n", + "# Rolling average\n", + "print(\"\\nDaily sales with 7-day rolling average:\")\n", + "daily_sales.plot(label='Daily Sales', alpha=0.7)\n", + "daily_sales.rolling(window=7).mean().plot(label='7-day Average', linewidth=3)\n", + "plt.title('Daily Sales with Moving Average')\n", + "plt.xlabel('Date')\n", + "plt.ylabel('Sales ($)')\n", + "plt.legend()\n", + "plt.grid(True, alpha=0.3)\n", + "plt.tight_layout()\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 2. Bar Charts\n", + "\n", + "Bar charts are excellent for comparing categories." + ] + }, + { + "cell_type": "code", + "execution_count": 56, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Horizontal bar chart:\n" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Product sales comparison\n", + "product_sales = df_sales.groupby('Product')['Sales'].sum()\n", + "\n", + "product_sales.plot(kind='bar', color=['skyblue', 'lightcoral', 'lightgreen', 'gold'])\n", + "plt.title('Total Sales by Product')\n", + "plt.xlabel('Product')\n", + "plt.ylabel('Total Sales ($)')\n", + "plt.grid(True, alpha=0.3, axis='y')\n", + "plt.tight_layout()\n", + "plt.show()\n", + "\n", + "# Horizontal bar chart\n", + "print(\"\\nHorizontal bar chart:\")\n", + "product_sales.plot(kind='barh', color='lightblue')\n", + "plt.title('Total Sales by Product (Horizontal)')\n", + "plt.xlabel('Total Sales ($)')\n", + "plt.grid(True, alpha=0.3, axis='x')\n", + "plt.tight_layout()\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 57, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Stacked bar chart:\n" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Grouped bar chart\n", + "region_product = df_sales.groupby(['Region', 'Product'])['Sales'].sum().unstack()\n", + "\n", + "region_product.plot(kind='bar', figsize=(12, 6))\n", + "plt.title('Sales by Region and Product')\n", + "plt.xlabel('Region')\n", + "plt.ylabel('Total Sales ($)')\n", + "plt.legend(title='Product', bbox_to_anchor=(1.05, 1), loc='upper left')\n", + "plt.grid(True, alpha=0.3, axis='y')\n", + "plt.tight_layout()\n", + "plt.show()\n", + "\n", + "# Stacked bar chart\n", + "print(\"\\nStacked bar chart:\")\n", + "region_product.plot(kind='bar', stacked=True, figsize=(10, 6))\n", + "plt.title('Stacked Sales by Region and Product')\n", + "plt.xlabel('Region')\n", + "plt.ylabel('Total Sales ($)')\n", + "plt.legend(title='Product')\n", + "plt.tight_layout()\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 3. Histograms and Distribution Plots\n", + "\n", + "Understand the distribution of your numerical data." + ] + }, + { + "cell_type": "code", + "execution_count": 58, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Histograms by product:\n" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Basic histogram\n", + "df_sales['Sales'].plot(kind='hist', bins=20, alpha=0.7, color='skyblue', edgecolor='black')\n", + "plt.title('Distribution of Sales Values')\n", + "plt.xlabel('Sales ($)')\n", + "plt.ylabel('Frequency')\n", + "plt.grid(True, alpha=0.3, axis='y')\n", + "plt.tight_layout()\n", + "plt.show()\n", + "\n", + "# Multiple histograms\n", + "print(\"\\nHistograms by product:\")\n", + "fig, axes = plt.subplots(2, 2, figsize=(12, 8))\n", + "products = df_sales['Product'].unique()\n", + "\n", + "for i, product in enumerate(products):\n", + " ax = axes[i//2, i%2]\n", + " product_data = df_sales[df_sales['Product'] == product]['Sales']\n", + " product_data.plot(kind='hist', bins=15, alpha=0.7, ax=ax, title=f'{product} Sales Distribution')\n", + " ax.set_xlabel('Sales ($)')\n", + " ax.set_ylabel('Frequency')\n", + " ax.grid(True, alpha=0.3)\n", + "\n", + "plt.tight_layout()\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 59, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Combined histogram and density plot:\n" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Density plot (KDE)\n", + "df_sales['Sales'].plot(kind='density', alpha=0.7, color='red')\n", + "plt.title('Sales Distribution (Kernel Density Estimate)')\n", + "plt.xlabel('Sales ($)')\n", + "plt.ylabel('Density')\n", + "plt.grid(True, alpha=0.3)\n", + "plt.tight_layout()\n", + "plt.show()\n", + "\n", + "# Combined histogram and density\n", + "print(\"\\nCombined histogram and density plot:\")\n", + "ax = df_sales['Sales'].plot(kind='hist', bins=20, alpha=0.5, color='skyblue', \n", + " density=True, label='Histogram')\n", + "df_sales['Sales'].plot(kind='density', ax=ax, color='red', linewidth=2, label='Density')\n", + "plt.title('Sales Distribution: Histogram + Density')\n", + "plt.xlabel('Sales ($)')\n", + "plt.ylabel('Density')\n", + "plt.legend()\n", + "plt.grid(True, alpha=0.3)\n", + "plt.tight_layout()\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 4. Box Plots\n", + "\n", + "Box plots show distribution quartiles and outliers." + ] + }, + { + "cell_type": "code", + "execution_count": 60, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Box plots by region:\n" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Basic box plot\n", + "df_sales.boxplot(column='Sales', by='Product', figsize=(10, 6))\n", + "plt.title('Sales Distribution by Product')\n", + "plt.suptitle('') # Remove default title\n", + "plt.xlabel('Product')\n", + "plt.ylabel('Sales ($)')\n", + "plt.xticks(rotation=45)\n", + "plt.grid(True, alpha=0.3)\n", + "plt.tight_layout()\n", + "plt.show()\n", + "\n", + "# Multiple box plots\n", + "print(\"\\nBox plots by region:\")\n", + "df_sales.boxplot(column='Sales', by='Region', figsize=(10, 6))\n", + "plt.title('Sales Distribution by Region')\n", + "plt.suptitle('')\n", + "plt.xlabel('Region')\n", + "plt.ylabel('Sales ($)')\n", + "plt.grid(True, alpha=0.3)\n", + "plt.tight_layout()\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 61, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAA9gAAAJICAYAAACaO0yGAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjMsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvZiW1igAAAAlwSFlzAAAPYQAAD2EBqD+naQAAT8VJREFUeJzt3QmYVXX9P/APDCKggoKIGgqKImqKuJuauWRumVtupZaWZC6Vpb/cNbdyq9RKLHLJ3XJJMy3LLDU0d8kNUBQXlEVFUARm+D+fY3f+MyPI4hnuLK/X89znzj3nzrnn3nO/M+d9vluH2bNnzw4AAADgE+n4yX4dAAAASAI2AAAAlEDABgAAgBII2AAAAFACARsAAABKIGADAABACQRsAAAAKIGADQAAACUQsAFok2bPnl3tXWgR+wAtmTICtDUCNkAr9/zzz8f3vve92HzzzePTn/50bLHFFvHd7343nn322QXe1g9/+MPYZpttmmU/m77OGmusUX8bNGhQrLfeevHFL34xLr744pg+fXqj5x9wwAHFbX498sgjceihh87zeRdddFHx+gv7OnMzY8aMOOuss+K2225b5J9tQy+++GKceuqpsd1228W6664bn/vc5+Loo49eqO9GS3XTTTcVx/CVV16p9q7Egw8+WOxL3pctvzsNy0ze1llnnfj85z8f559/fnzwwQfRXJqWk7L88pe/jOHDh5e+XYBq6lTVVwfgExk1alTss88+RTg98cQTo1evXjF+/Pi46qqrYu+9944rr7yyWNcS9e7duwjTqa6uLt599914+OGHY9iwYXHffffFFVdcEYsvvnix/pRTTlmgbd94440xZsyYeT7vy1/+cmy55ZZRtjfffLPY/7PPPrt+2be//e048MADY1H5y1/+Escee2ysvvrqcdhhh0Xfvn2L70buV343fvWrXxUXZVq7vGhw/fXXx3LLLRdt3VZbbVV8jyoyVGeYz6D66quvxgUXXBCtyc9//vM44ogjqr0bAKUSsAFascsuuyyWWWaZ+PWvfx2dOv3/P+lZY7nDDjsUJ96XXnpptESdO3f+SPjPADF48OA4/PDD47e//W0RDNNqq63WLPuw/PLLF7dFYeWVV45F5eWXX47/+7//Ky4e/OxnP4uampr6ddtvv33st99+xfq///3vxXFozXr27Fnc2oN8n03LzCabbFJcOMma/Gwl0R4uNAC0ZJqIA7RiEydOLPowZg1wQ926dYvjjz8+dtxxx/pltbW1RdjeZZddiubCeaK+7777xogRI+ZZG7zzzjsXzc+ztjCbi+a2KiZPnhzf//73i9rQbLL6pS99KW655ZaFfk95cSD37brrrptr0+3777+/qIUdMmRIbLTRRkUQr9RYZ8i4+eabixq9bNaawSObD+fPeUEiLzxkiP/DH/4w16avv/jFL+Izn/lMsf2sMRw3btzHNvWubL/yWttuu22x/Ljjjqt/btPfy8/w6quvLprFV5pvn3feeY2a+ubvfO1rXyv29Qtf+EJxDPLz/ec///mxn+Hvfve7opl6tmpoGK5T165di3C95557xjvvvFO//I477og99tijeM95LE8++eRG6/Ozys/ur3/9a/Edqhzrxx57LB5//PGiNUC+j1z373//u9Hv5fu+55576j/7PHZNm1Fns/Wszdx0001j7bXXLi4OnHHGGY26C+RnnK0ecj/ztfLnpk3E5+f7OHbs2DjqqKOK5+R3Lb9b2a2g6fH885//XDwvP5ONN964+Dzfe++9mJfRo0fH/vvvX9+EO49HRW7vs5/97EfK7AknnFAc44WR34v8O/D6668Xj/Pzzi4KBx10UPE55bYrLSvyO5kXsnL5XnvtFX/7298abSu/f9nyIj+bfN/5/KbNz+fUlWJOzeNfeOGF4pjmZ5fldOjQofXltFLu8hg2R/NzgGoRsAFasQxlr732WhGUM6zlyWtl0KAMM7vvvnv9czO8ZY12Nin/zW9+E6effnq8/fbb8Z3vfCfef//9OW4/m2ufdNJJsdlmm8Ull1wSX/nKV4ra8lxWccwxxxSve9pppxXr1lprrSLAzSu4f5w8uc9auQzJTWXYzdCboSKbOZ955plFX+Psc52hJddlgMgm6Nl0OD+jhmHvm9/8ZpxzzjlzbR6dQetPf/pTETAz4GXwy6bdU6dOna99zxrEStP3DP6Vn5vK7WeQyQsK+T7ys82m/bn/DQd+GjlyZNFPNYNZBv8MzEceeWSj8NvUv/71r+I49OnTZ47r83hmv/38jFJ+L7JvdobNCy+8sGhBcNdddxUhqmHAzWPy4x//OL71rW8VzXunTJlS7Ff+bgbs3L/c99x2w9/L0JvfiQyd+XtdunSJQw45JJ555pn64JfvP7+Huf38HuVFnQym2c2hofwe5kWJ3M85BdJ5fR8z/GZAzxCdgTnLRYcOHYow+tBDDzXaVnZN+NSnPlV8Prm/v//974tjNS95XPOzzOdWLhRk0/yUofaNN95oFETzs7rzzjsbldcFkd//tNJKK9Uvy78HGfBz3/M182Jc3mc3jDw+WRbyveWx/uMf/9jo87vhhhuKMJytH/J7dvnlly/wPuV7zL81eTEjxwE499xzi33Izzn/7mTZrHwelZ8B2gJNxAFasQwsEyZMKALYj370o2JZNhnPgc4yFGYtVUWGmDyxbljzlH2cM6w999xzH2l6mn2iK4E8g0jK7S699NLF469//etF/94MJXmSnkExZW1VPueTND1edtlli/s8Ic8Q0NCTTz5ZBJIMAJUAmc28syYuaxezKXY2pW3YBL1S65g1+llz+3EywGbz9ErT8VVXXTV22223ohb0q1/96jz3PV93zTXXLH7OfcmA11SGvAxrWdNaGYwtA3+G8+w3nTXUeZGgchyylrbSxDxbJ+R+ZGCcW41nBuHKPsxLBqgMglmrnKG/YuDAgUXozdrzvE8ZgDN0Zg1s5X3kAFt5kSODUuWzztCdoa+yD/l7GbLyc0xZS53fl2xR8dOf/rQYqC+fm+F7ySWXLJ6TLQiypUIG0YYD1m244YbFd6/iqaeeavR+5vV9zAse+XMG98pr5UWYrHnPCy95XCryGGQ4r1yUyP35xz/+URy3j5OfZR7HSpnJsJkXq7Ls5eP8buX3KbeZslVAfm6Vz2du8uLFrFmz6h9PmjSp+K5ka4+ddtqpUVP5FVdcMX7wgx/UP86Amxc68sJJpUzl+8sWEvm+8/3nhYlcn8cquxGkvECQFzTyWC+IDOXZiiJbjVQu5ORghrndJ554ov77nZ9FSx0nAmBhqMEGaOWyBjprLDPoZMjJ0JCjV1cGOavI9Vl7lCfZWYuVwalSc5Unwk1l098MstncNE/qK7dKM+cMG5U+oFkblqEqm5NnKM5Qsv766y/0e6rU4GbNYlPZxDgvDOR7zWCX7z1P3PPiQSUwzc38hM7c74b9svN3smbwP//5T5SlUlOatbQN5eMM+A1rNzM0Ney/Xdm3ubU6SLmNhs34P042787jnwGroQyyGcSa1uo2PK6VCyF5TCoyzKas3a7I8QEabj9rsDOkVz7TDJ1Ze5/HNYNcXizJ0J/f1abfzXkdw3l9H/P9bL311o2+K7l/+dlna4Fp06bVL28a/PKzn58m4hl2G8pm4hmGs8l0x44di5rqHISucgyzS0NeUJjXeAAZyrP5fOWWn2GG4eyS0HQgwKafU77vbPLd9ILVrrvuWlyky33LvwupYVeG3N+FabqeLUHy86uE65TvL7sKVMI1QFukBhugDejRo0cRYCoh5umnny6aematVdY+Za121vRls9m8z364OXBY1nLNbS7abMaZ5jbdVdaIp6yBzGa72V81a7/yhDzDQtaoNz2Zn19Z45fm1MQ5R8POMJa1n1nbmBcRunfvXtTm5/RkcwrlFVn7Oy+V0NhQjs7eMDB+UpXm3Q3DRyXo5bHKWuuKPFYNVd5f0z68DeVxza4DczNz5sxiH/K9VvZlTu87lzXclzSnixhN93FO22k4CF/lM618x/K95AjY2aw5A+wKK6xQtL6ojCK/IMdwXt/Hyvue0z5mOWjYFaDp+8ptzc+8zU23n+81VT7rbEWR+5ghO2vzs896NlWfl7wwkLXzle9B7l++p7xgMa/PKV+7YRPypvua3+/K/uV3sKGm39P5kcc2yypAe6MGG6CVyhCaNX9ZS9dUNkvOGt2s/cs+yxkavvGNbxQn3dm/+NFHHy3C6cc1l87QmvLEP5/b9FZpar7UUksVYT5HpM5Qk/1xc/sZ5hfWAw88EP369ZtrH+LKAFdZ05tNUbN5dQaW7Mf6Sc2pb3PW8FWa32awaVo7PD+1mk0viFS22zT4vvXWWx8JOAsqvxd5kaXp9ivuvffe4jPLpsmVfcma3qby9z/pvqRKkG4oX68SPPNiSR7H7HqQtajZDDv7WC/M6ODz+j7m+53be01lvN+m36HK61XebwbdbLqe+5ff2bxoUWnS/nGydUD2q85bjkEwYMCAOYbrOcn3PafvQ8P3XXnvTT+fOR2/eZWBPA7ZAqGpvJjQcNBAgLZGwAZopSq1gtdcc81HRvlN2eQzawAzqObPeZKc/bKz5jpr4lJlNOo51YZms9/FFlusCPKVk/q85WtmbWMOEpWDkGVzz0qwzf7KOYhY1hh+XA3qx8lwlbXslT6gTWUQy5q8vHiQfWmzH2sO2JYqr1l5fwsjm7Y2rLXN/qL5PrOmMS2xxBJFCG74mTccgTo1Hbm7qQxXKS92NJSPM7hssMEG8Ulkn+k8dtmEfk5BKMNrhqlsYpzHOT/H22+/vdHzMujm5/lJmvpXZFeDbMrf8HF+9yp9kPPzy+9lXvDJYJbye5d9sz+upr6p+fk+5mjW2Uy5YU11fkb52ef3u4xpy/I73FBuO2vlsyxWZBeHvJCUn3s2KZ9TbX2Z8n1nt4+mAwdmN5Gsoc59q3zHm16oys+robwgkP38G2paBrKLQZadhiE7m8nnhb68wPNJyylAS6WJOEArlSEu+19mk9EMJhmqskYr+3Vm/+hsbpv9s7PmapVVVilOirOWNwNy3rL5bGVApzn1580AlifDOfBUhpHs25qhJx9nLW72e84wlP0qc5TkfE72Fc5+rHkCnYOQfZwMyNn/N2Wz22yimqEum3zna81tQLEMAVmrnu87n5OfQw7ylMEog3el9j1r4XI/5newr4oMdNksPkfKziCdfddzwK/sq5ryNXJ065z6KENShsAcyKlhqK6ExKyty2PSsI9yyjCZ/XAz6OZnn+EnR9TOWvl87zmw1CeRTXPzu5H7mN+LHGU+A17Oj537mjWIOTBehrq85fvNEcAzlOf7y4sneZwr+1mGnO4pm/BnLW6+dgb9yjzn2SKhMmd79tt96aWXikHB8jvycX3Nm8rm0vP6Pua0URnu82JTvu98z9nlID+THF2/DPn9yAsx2ZIkw3VeXMiBxBp2X8h+zXlhKAftazgqf3PJgeEyTOegZvkZZG149unOwfJySq8Muxmyc1DDbGaf4y1k2bn11luLQRAbyu9IthDI0dKzv3aW26ZToeXr5LL8G5KffX7O2a8+j092W6mU02xdkH3xM5B/XPcOgNZCwAZoxXL045xSJwNLhuesLcqgmSf2eZK8/fbb1we+DDB5kp+hO0/+8+Q5g0XW8OUJctO5nVMGoqzdylryDB8Z1rPWMZvdVkJkhsKs0c5AloE0g1yewM+t73bDpql5Ml+RzdfzQkAOTpXNz/OEfE4y2Od7zUCY+5G1j9lcNkf+zhrLlNMwZajKEJ7bazro1MfJprrZhzmbGWfIyDCRQbVSw5hNq3PQrAxReZEiB5vKzyBDbEVezMhAk9MP5X5UBoRrKGuXM9DkYHM5nVSOIJ6hL6fpKqNmL4Nxbj+nh8rplrL2MI9l1kjnIGAZ/CtyJPlsEZHfh9znDF85zVse//nptz4/MvBnkMvvaO7DtddeW1+jmwEsvzt5cSWPa36Hcv7qDFwZtPPiS6XLwrzM6/uYI9/n9zmfk6E/XyMDfr52hrwyZMDP8pKfezYHz9dqOqBdfp/yYlG2Lmk42n9zyWOfn3leMMr9y+4IWZby70Jl3vaUg6VVvgvZ1D0v9uTFpnwvFXlBLy/W5OBseXErLxDlxaKGrU7yc8/POceByPnc8+9SXjzKv0uVbgm53Xz9/BuU87BXxoQAaM06zJ6f0ToAABZChvkMvU1rQdu7bCafzdnzgkqO7g9A26AGGwBgEck+0Fnzm/2vs/Z8XvOyA9C6CNgAAItINv+v9NHO5tLzmrsdgNZFE3EAAAAogfkRAAAAoAQCNgAAAJRAwAYAAIASCNgAAABQAqOIf4wJE96t9i7wCXTs2CF69lwiJk+eFnV1xvKDalAOobqUQag+5bBt6N17qfl6nhps2vQfs5xjNO+B6lAOobqUQag+5bB9EbABAACgBAI2AAAAlEDABgAAgBII2AAAAFACARsAAABKIGADAABACQRsAAAAKIGADQAAACUQsAEAAKAEAjYAAACUQMAGAACAEgjYAAAAUAIBGwAAAEogYAMAAEAJBGwAAAAogYANAAAAJRCwAQAAoAQCNgAAAJRAwAYAAIASCNgAAABQgk5lbAQAAKA1Gjv2xZgy5Z1m235NTceorZ0eNTVdora2LppT9+49on//VZr1Nfh4AjYAANAuTZo0KTbddEjU1TVv8F1UampqYuTI0dGrV69q70q7JWADAADtUgbRESMea9Ya7DFjRsXQoYfEsGHDY8CA1aO5a7CF6+oSsAEAgHaruZtUZxPxNHDgGrH22us262tRfQY5AwAAgBII2AAAAFACARsAAABKIGADAABACQRsAAAAKIGADQAAACUQsAEAAKAEAjYAAACUQMAGAACAEgjYAAAAUAIBGwAAAEogYAMAAEAJBGwAAAAogYANAAAAJRCwAQAAoAQCNgAAAJSgUxkbgZamtrY2Roy4L6ZOfTuWXHLp2GijzaKmpqbauwUAALRhAjZtzu23/zFOPfWEePnll+qXrbxyvzj11DNjl112req+AQAAbZcm4rS5cH3IIQfEmmuuFXfd9fd49913i/t8nMtzPQAAQHMQsGlTzcKz5nr77XeIK664NjbaaONYcskli/t8nMtPPfXE4nkAAABlE7BpM0aMeKBoFv6d73w/OnZs/NXOx0cddXS8/PLY4nkAAABlE7BpM954Y3xxP2jQWnNcn83EGz4PAACgTAI2bUafPssX988++/Qc1z/zzNONngcAAFAmAZs2Y9NNP1OMFv7zn58fdXV1jdbl4wsvvCBWXrl/8TwAAICyCdi0GTnPdU7F9Ze/3BkHHbRfPPTQg8Uo4nmfj3P5qaeeYT5sAACgWZgHmzYl57kePvx3xWjiO+ywbf3yrLnO5ebBBgAAmouATZuTIXrHHXeO//zn3zF16tux5JJLx0YbbabmGgAAaFYCNm1ShukttvhsLLPMEvHWW9Ni1qzGfbIBAADKpg82AAAAlEDABgAAgBII2AAAAFACARsAAABKIGADAABAWwrYM2bMiF122SUefPDBj6x79913Y8stt4ybbrqp0fLbb789tttuuxg8eHAcfvjhMXny5Pp1s2fPjvPOOy823XTT2HjjjeOcc86JujojSQMAANCGA/YHH3wQRx99dIwaNWqO688999x48803Gy178skn44QTTogjjjgirr/++pgyZUocd9xx9esvu+yyIoBffPHFceGFF8Ztt91WLAMAAIA2GbBHjx4de++9d7z88stzXP/www/HiBEjonfv3o2WX3XVVbHjjjvGbrvtFoMGDSpqqO+9994YN25csf7KK6+Mo446KjbccMOiFvsHP/hBXH311YvkPQEAAND+VD1gP/TQQ7HJJpsUtdBzajZ+0kknxcknnxydO3dutO6JJ54ownPFCiusECuuuGKx/I033ojXX389Ntpoo/r1G2ywQbz66qsfqQkHAACAMnSKKtt///3nuu6SSy6JtdZaK7bYYouPrMugvNxyyzVa1qtXrxg/fnxMmDCheNxw/bLLLlvc5/qmv/dxOnSY76fSgjmOUH3KIVSXMgjVpxy2fVUP2B/XdPy6666LP/7xj3NcP3369I/UaufjrPXOdZXHDdelXD+/OneuWci9pyWoqfmwgcZii9VEx47+mkE1KIdQXcogVF+nTh3r77Ms0ra1yICdI4CfeOKJRR/qSs1zU4svvvhHwnI+7tq1a6Mwnc+r/Jxy/fyaMaPWVaZWrK5udnE/c2Zt1NYaQR6qQTmE6lIGofpmzaqrv8+ySNvWIgP2a6+9Fo899lg899xz8ZOf/KRY9v7778cpp5wSd9xxR/zmN7+JPn36xMSJExv9Xj7OwdByXcqm4n379q3/OTUdLG1eZn/4f4lWznGE6lMOobqUQag+5bDta5EBOwPyX/7yl0bLDjjggOK26667Fo9z7utHHnkk9thjj+JxDmqWt1yev58DnuX6SsDOn3PZgvS/BgAAgFYdsDt16hT9+vX7yLIcxKxSO73ffvsVgXu99daLddZZJ84888z43Oc+FyuttFL9+vPOOy+WX3754vH5558fBx98cBXeDQAAAO1BiwzY82PIkCHxox/9KC688MJ45513YvPNN4/TTz+9fv0hhxwSkyZNiiOOOCJqampir732iq997WtV3WcAAADarg6zc0Qx5mjChHervQt8AjlS4zLLLBFvvTWtfnAJYNFSDqG6lEGovv/+98nYeust4p577ou111632rvDQurde6n5et6HY8YDAAAAn4iADQAAACUQsAEAAKA9D3IGwCczduyLMWXKO836GjU1HaO2dnrU1HSJ2trm6//ZvXuP6N9/lWbbPgDA/BCwAdqhnGVh002HRF1d2xj0KGeLGDlydDGdIwBAtQjYAO1QBtERIx5r9hrsMWNGxdChh8SwYcNjwIDVm7UGW7gGAKpNwAZopxZFk+psIp4GDlzD1CQAQJtnkDMAAAAogYANAAAAJRCwAQAAoAQCNgAAAJRAwAYAAIASCNgAAABQAgEbAAAASiBgAwAAQAkEbAAAACiBgA0AAAAlELABAACgBAI2AAAAlEDABgAAgBII2AAAAFACARsAAABKIGADAABACQRsAAAAKIGADQAAACUQsAEAAKAEAjYAAACUQMAGAACAEgjYAAAAUAIBGwAAAEogYAMAAEAJBGwAAAAogYANAAAAJRCwAQAAoASdytgIAAALbuzYF2PKlHeabfs1NR2jtnZ61NR0idraumhO3bv3iP79V2nW1wBo6QRsAIAqmDRpUmy66ZCoq2ve4Luo1NTUxMiRo6NXr17V3hWAqhGwAQCqIIPoiBGPNWsN9pgxo2Lo0ENi2LDhMWDA6tHcNdjCNdDeCdgAAFXS3E2qs4l4GjhwjVh77XWb9bUAMMgZAAAAlELABgAAgBII2AAAAFACARsAAABKIGADAABACQRsAAAAKIGADQAAACUQsAEAAKAEAjYAAACUQMAGAACAEgjYAAAAUAIBGwAAAEogYAMAAEAJBGwAAAAogYANAAAAJRCwAQAAoAQCNgAAAJRAwAYAAIASCNgAAABQAgEbAAAASiBgAwAAQAkEbAAAACiBgA0AAAAlELABAACgBAI2AAAAlEDABgAAgBII2AAAAFACARsAAABKIGADAABACQRsAAAAKIGADQAAACXoVMZGYGGMHftiTJnyTrNtv6amY9TWTo+ami5RW1sXzal79x7Rv/8qzfoaAABAyyZgUxWTJk2KTTcdEnV1zRt8F5WampoYOXJ09OrVq9q7AgAAVImATVVkEB0x4rFmrcEeM2ZUDB16SAwbNjwGDFg9mrsGW7gGAID2TcCmapq7SXU2EU8DB64Ra6+9brO+FgAAgEHOAAAAoAQCNgAAAJRAwAYAAIASCNgAAADQlgL2jBkzYpdddokHH3ywftnjjz8e++67bwwZMiS+8IUvxI033tjodx544IHidwYPHhwHHnhgjBs3rtH6yy+/PLbccsvi948//vh4//33F9n7AQAAoH1pEQH7gw8+iKOPPjpGjRpVv2zChAnxzW9+MzbeeOO4+eab46ijjorTTz89/vGPfxTrX3vttTj88MNjjz32iN///vfRs2fP+Pa3vx2zZ88u1t91111x8cUXx49+9KO44oor4oknnohzzz23au8RAACAtq3qAXv06NGx9957x8svv9xo+d133x3LLrtsEbz79+8fO++8c+y2225x2223FeuzNvvTn/50HHzwwbH66qvH2WefHa+++mo89NBDxforr7wyDjrooNh6661j3XXXjdNOOy3+8Ic/qMUGAACgbQbsDMSbbLJJXH/99Y2WZ9PuDM1NTZ06tbjPGukNN9ywfnnXrl1j7bXXLpqV19bWxlNPPdVo/XrrrRczZ86MZ599tlnfDwAAAO1Tp2rvwP777z/H5X379i1uFZMmTYo//elPceSRR9Y3IV9uueUa/U6vXr1i/PjxMWXKlKLZecP1nTp1iqWXXrpYDwAAAG0uYM+P6dOnF8E6m4zvs88+xbJs6t25c+dGz8vHOVhaPr/yeE7rF0SHDp9492kBHEeoPuUQqksZhOpTDtu+Fh+wp02bVgxeNnbs2LjmmmuKpuBp8cUX/0hYzsfdu3cv1lUeN11f+f350blzTSnvgero1Klj/f1iizmWUA3KIVSXMgjVpxy2Ly06YGd/62984xvFAGg5EngOdlbRp0+fmDhxYqPn5+M111yzaAqeITsfDxgwoFg3a9asePvtt6N3797z/fozZtS6ytSKzZpVV38/c2ZttXcH2iXlEKpLGYTqUw7blxYbsOvq6uKII46IV155JX73u9/VB+WKnPv6kUceqX+cTcaffvrp4nc6duwY66yzTrE+B1BLOfhZ9sMeNGjQAu3H/2b9opVzHKH6lEOoLmUQqk85bPuqPor43OTc1g8++GCcccYZRbPvHNQsb1kLnfbcc8949NFH49JLLy3mzz7uuOOKQdEqgToHTxs+fHgx3deTTz4Zp556ajEd2II0EQcAAIBWX4N91113FbXYQ4cObbR84403Lmq0M0xfdNFFcdZZZ8UvfvGLGDJkSHHf4X9tunPe7JwX++STTy76Xm+//fZxzDHHVOndAAAA0Na1qID93HPP1f+ctc/zstVWWxW3uTn00EOLGwAA0Lq88sq4mDx5UrR2Y8aMKu6ff/65qK39sD92a9azZ6/o23elau9Gi9WiAjYAAECG6y02Xz/ee/+DaCuGDj0k2oJuXReP++5/VMieCwEbAABoUbLmOsP1VedErLlqtHpvTYlYpnu0es+8EPHVYz8ojo+APWcCNgAA0CJluF5/7WrvBbSBUcQBAACgNRGwAQAAoAQCNgAAAJRAwAYAAIASCNgAAABQAgEbAAAASiBgAwAAQAkEbAAAACiBgA0AAAAlELABAACgBAI2AAAAlEDABgAAgBII2AAAAFACARsAAABKIGADAABACQRsAAAAKIGADQAAACUQsAEAAKAEAjYAAACUQMAGAACAEgjYAAAAUAIBGwAAAEogYAMAAEAJBGwAAAAogYANAAAAJRCwAQAAoASdytgIbcsrr4yLyZMnRWs3Zsyo4v7555+L2tq6aO169uwVffuuVO3dAAAA5kLA5iPhevPNN4r3338v2oqhQw+JtqBr125x//3/EbIBAKCFErBpJGuuM1x/59yLo++qq0VrN23KO7FE9x7R2r3ywuj4+TFHFMdHwAYAgJZJwGaOMlyvuva61d4NAACAVsMgZwAAAFACARsAAABKIGADAABACQRsAAAAKIGADQAAACUQsAEAAKAEAjYAAACUwDzYAABz8Mor42Ly5EnRmo0ZM6q4f/7556K2ti5au549e0XfvitVezcA5krABgCYQ7jefIsN4/333o+2YOjQQ6It6Nqta9x/38NCNtBiCdgAAE1kzXWG668O+2r0GdgnWrP33nkvuvXoFq3dG8+/EVcNvao4NgI20FIJ2AAAc5HheqXBwhwA88cgZwAAAFACARsAAABKIGADAABACQRsAAAAKIGADQAAACUQsAEAAKAEAjYAAACUQMAGAACAEgjYAAAAUAIBGwAAAEogYAMAAEAJBGwAAAAogYANAAAAJehUxkYAKNcrr4yLyZMnRWs3Zsyo4v7555+L2tq6aO169uwVffuuVO3dAABaKAEboAWG6y0+s2G8N/39aCuGDj0k2oJuXbrGfQ88LGQDAHMkYAO0MFlzneH60j32iIHLLhut3dvTp8fSXbpEa/f8xIlx6E03FcdHwAYA5kTABmihMlyvt+KK1d4NAACaM2DPmDEjHnnkkXj88cdj4sSJ0aFDh1huueVi8ODBsdFGG0XHjsZOAwAAoH1ZoIA9ZcqUuOKKK+Lqq68ufu7bt2/07NkzamtrY9KkSfHTn/40unfvHl/5ylfia1/7WvEzAAAAtAfzHbDvvvvuOP3004ta6tNOOy222mqr6NKkT93bb78dDz74YNx8882x0047xSmnnBKf//znm2O/AQAAoHUG7FtvvTV+97vfxcorrzzX5yy99NLxhS98obiNGTMmzj//fAEbAACAdmG+A/ZFF120QBseMGBA/PKXv1yYfQIAAIC2P8hZXV1do0HMsu91DnaWtdfrrbde1NTUlL2PAAAA0OLN93DfTzzxRNGvOvtXV/z5z38umoAffvjhxcBme+65Z7zxxhvNta8AAADQugP2e++9F9/61reiX79+RS11ZdlJJ51UhO6HH3447rjjjmLQs7POOqu59xkAAABaZ8D+29/+VjT9Pv7446Nr167x2muvxZ133hkffPBBHHDAAcWUXRmuDz300BgxYkS8/vrrMXXq1ObfewAAAGhNfbB///vfF3NdNxy0LGutV1hhhbj88svrl02bNi3eeeeduPDCC2O77baLbbfdtnn2GgAAAFpjwD7xxBOL/tUHH3xwrL766jFhwoTYdddd47vf/W7ss88+jUYaf+655+Lss89uzn0GAACA1hmwM1RnbfRee+0VgwYNihdeeCF69+4du+++e7H+sccei3POOacYTfzUU09t7n0GAACA1jtN109/+tO45ZZbYuTIkbH11lvHvvvuG507dy7WdejQIbp161Y0Dc9RxQEAAKC9WaB5sHfbbbfi1lSOLD58+PAy9wsAAADa5jzY2Tw8BzabXw888EDRbxsAAADag/muwT7llFOKabpyOq4dd9wxttpqq1h11VWL5uEVzz77bDFN1x/+8IeYOXNm/OQnP2mu/QYAAIDWGbDXWWeduPnmm+PWW2+Nyy67rBjULPtg9+jRI+rq6orpuXIqr9VWWy0OPPDAYgC0Tp0WqAU6AAAAtFoLlIAzMGez77y99NJLxajhEydOjI4dOxajig8ePDhWWmmlhdqRGTNmxB577BEnnXRSbLLJJsWycePGFY/zdVZcccWiBn2LLbZo1Az9rLPOKp6Xr33mmWc2ev2cozv7hk+dOrWodc9tde3adaH2rz3psnS3mDb77Zj83uvV3hX+J49HHhcAAKDlWugq5n79+hW3MnzwwQfx/e9/P0aNGlW/bPbs2XH44YfHwIEDiybnd999dxxxxBFxxx13FGH7tddeK9YfeeSRseWWW8YvfvGL+Pa3vx1//OMfi2brd911V1x88cVx7rnnRq9eveK4444rfj755JNL2ee2bNXt1or/zvpH/Pf5f1R7V2hyXAAAgJar6m24R48eXYTrDNQNZV/urJm+7rrriinABgwYEP/+97+LsJ2h+sYbb4xPf/rTcfDBBxfPP/vss2PzzTePhx56qKgBv/LKK+Oggw4qphRLp512WhxyyCFxzDHHqMWehxfufjr23vcH8alVV6/2rvA/r74wKm67+8qIb1V7TwAAgBYbsCuB+Hvf+14x3VfFE088EWuttVYRris22GCDorl4Zf2GG25Yvy5D89prr12sz+VPPfVUUeNdkdvOgddyILYhQ4YssvfXGk1/+71YosPS0bPbCtXeFf7n7Q4TiuMCAAC0XFUP2Pvvv/8cl0+YMCGWW265Rsuyqff48ePnuX7KlClFs/OG67P/+NJLL13/+/OrwSDp0CL4TkL1KYdQXcogVJ9y2EID9ty8//77xSjlDeXjHAxtXuunT59e/3huvz8/OneuifamU6f5nhqdKh2fxRZrf9/L9kY5bNmUw/ZBOWy5lMH2QRls2ZTDZgjYjz76aPTv3z969uwZt9xyS/z5z3+O9ddfPw499NBGc2MvrMUXXzzefvvtRssyHOc83JX1TcNyPu7evXuxrvK46foF6X89Y0Ztu7syM2tWXbV3gXkcn5kza6u9GzQz5bBlUw7bB+Ww5VIG2wdlsGVTDksO2DnwWA4a9tvf/jaWWWaZYoTuzTbbrJgWK/s5N+z7vLD69OlTDIDWUE4JVmn2nevzcdP1a665ZtEUPEN2Ps7B0dKsWbOKwJ7TiS2IJmOvQdX5TkL1KYftQ06POGNmbUyd8mHLOKorj0VlykplEKpPOSwxYF9xxRVx4oknFqH6ggsuiNVXX70I2//617/ilFNOKSVg57zWl156adHcu1Jr/cgjjxQDnVXW5+OKbDL+9NNPF6+d83Kvs846xfrKnNo5+Fn2wx40aNAn3jcAoH1Mjzh+8vQYP2JstXeF/zFlJdAmA/Yrr7wS22yzTfHz/fffH5/97GeLn7O2uGmt8sLaeOONY4UVVihqx3N+63vuuSeefPLJYjqutOeee8bw4cOLEJ5TceU82H379q0P1Dl4Ws55nfNoZ633qaeeGnvvvbcpugCA+Z62cttDt4g+A5ev9q4QEW88Pz5uu/tpU1a2I9liYfzsbvGCiVRajPGz87g4IKUH7Byt+8033yxqhJ955pn4wQ9+UCzPKbCWXXbZKENNTU388pe/jBNOOCH22GOP6NevXxGiV1xxxWJ9humLLroozjrrrGJ5Tr2V95X+3zvvvHO8+uqrRcjOvtfbb799MQc2AMD8yOkROy9WE0t2/7AlHdX11mI1pqxshy0WLq/dMGJUtfeEhlbd7uFq70LbC9gZXjNUZ23w8ssvX9Q233HHHXH66afHXnvttdA789xzzzV6nKH6qquumuvzt9pqq+I2NzngWt4AAIDW14rk3H3GxqBVq70nVDz7QsSed7+nJUnZAfv73/9+EazHjRsXX/nKV4ra5kmTJsW+++4bRx555MJsEgAAoF62WFi+w3ux6odj29ECvN0hj0u196INBuwcROyAAw5otKzpYwAAAGhPFnoG93vvvTcOPPDA2GKLLYq+ztkf+tZbby137wAAAKAtB+wcOTynw8oBx6ZMmRJ1dXXFPNM54vctt9xS/l4CAABAWwzYWVud/bB//OMfF/2v0/e+973illNnAQAAQHvTcWFH+67Mg93QDjvsEC+//HIZ+wUAAABtP2AvtdRSxTzYTY0ePTp69OhRxn4BAABA2w/YX/ziF+Oss86KZ599Njp06BDTpk2Lf/7zn8U82DvttFP5ewkAAABtcZqu7373uzF+/PjYbbfdise77757zJ49Oz73uc8V/bAB+GS6LN0tJiwZ8dLiM6u9K/xPHo88LgAApQbsxRZbLM4///w46qij4plnnilGER84cGCsttpqC7M5AJpYdbu14oaNs5HRpGrvChWrdIxVX16r2nsBALS1gF3Rr1+/4gZAuV64++k4beX1Y2Dv3tXeFf7n+QkT4oC7n474VrX3BABo9QF70KBBRX/r+ZG12gAsvOlvvxe9p0b0675YtXeF/3lr6ofHBQDgEwfsHNRsfgM2AAAAtDfzHbD32GOP5t0TAAAAaI99sP/2t7/F888/H7W1tfXLZsyYEU899VRcdtllZe0fAAAAtN2Afd5558VvfvObWHbZZWPSpEnRp0+fmDhxYhG2d9555/L3EgAAAFq4nANmgd12221x/PHHx3333RfLLbdcXHPNNcXP66+/fqy00krl7yUAAAC0xYCdtdbbbLNN8fMaa6wRTz75ZCy99NLxve99L+64446y9xEAAADaZsDu3r17vPfeh1OVrLzyyjF69Oji5xVXXDHeeOONcvcQAAAA2mrA3mSTTYp+2BmmBw8eHHfeeWdMnjw57rrrrujZs2f5ewkAAABtcZCzY489Ng477LD485//HPvvv38xavjmm29erPvhD39Y9j4CAFTFG8+3/pZ5773zXnTr0S1au7ZwLIC2b6EC9gorrBC33HJLfPDBB9G5c+e4+uqri0HOcjTxddddt/y9BABYhHr27BVdu3WNq4ZeVe1doYE8JnlsANrcPNhp8cUXL5qGP/zww9GrVy/hGgBoE/r2XSnuv+/hmDx5UrRmY8aMiqFDD4lhw4bHgAGrR2uX4TqPDUCbCNi/+MUv4sorr4wbbrgh+vXrF48++mgceuihMXXq1GL9ZpttFr/61a+iS5cuzbW/AACLRAa51h7mamo+HG5n4MA1Yu21VYQAtJhBzq6//vq45JJLYu+99y5qq1POhZ1h+vbbb4977703pk2bFpdeemlz7i8AAAC07oB94403FgOYff/7348ll1wynnrqqRg7dmwccMABsdpqqxX9r3Pgsz/96U/Nu8cAAADQmgP2mDFj6kcKTyNGjIgOHTrEVlttVb8sg/Zrr71W/l4CAABAW5oHOwN1RQ5s1qNHjxg0aFD9smwi3rVr13L3EAAAANrSIGcDBw4sBjXLwc2mTJkSDz74YGy77baNnpPzYufzaP1eeWF0tAXTprwTS3TvEa1dWzkeAADQls13wP7KV74Sp5xySjzzzDPx2GOPxYwZM+Kggw4q1r3xxhtx2223xfDhw+PMM89szv1lUcz72bVb/PyYI6q9KzSRx8XcnwAA0AYC9q677lqE6muvvTY6duwYP/3pT+vnvR42bFgxddc3v/nN+NKXvtSc+8uimPfz/v+0+nk/k7k/AQCAFjsP9l577VXcmho6dGgceeSRscwyy5S5b1RJW5j3M5n7EwAAaLEBe25yii4AAABozxZoFHEAAABgzgRsAAAAKIGADQAAACUQsAEAAKAEAjYAAACUQMAGAACAEgjYAAAAUAIBGwAAAEogYAMAAEAJBGwAAAAogYANAAAAJRCwAQAAoAQCNgAAAJRAwAYAAIASCNgAAABQAgEbAAAASiBgAwAAQAkEbAAAACiBgA0AAAAlELABAACgBAI2AAAAlEDABgAAgBII2AAAAFACARsAAABKIGADAABACQRsAAAAKIGADQAAACUQsAEAAKAEAjYAAACUQMAGAACAEnQqYyMAlO/5iROjLXh7+vRYukuXaO3ayvEAAJqPgA3QwvTs2Su6dekah950U7V3hSbyuOTxAQCYEwEboIXp23eluO+Bh2Py5EnR2o0ZMyqGDj0khg0bHgMGrB6tXYbrPD4AAHMiYAO0QBni2kKQq6n5cKiPgQPXiLXXXrfauwMA0KwMcgYAAAAlELABAACgBAI2AAAAlEDABgAAgBII2AAAAFACo4gDAAAt0jMvRJvw1pSIZbpHq9dWjkdzErABAIAWpWfPXtGt6+Lx1WM/qPau0EQelzw+zJmADQAAtCh9+64U993/aEyePClauzFjRsXQoYfEsGHDY8CA1aO1y3Cdx4c5E7ABAIAWJ0NcWwhyNTUfDns1cOAasfba61Z7d2hmBjkDAACA9hCwX3/99Rg6dGisv/76sc0228Tll19ev+7pp5+OL3/5yzF48ODYc889Y+TIkY1+9/bbb4/tttuuWH/44YfH5MmTq/AOAAAAaA9afMD+7ne/G926dYubbropjj/++PjZz34Wf/3rX+O9996LQw89NDbccMNi3ZAhQ4ognsvTk08+GSeccEIcccQRcf3118eUKVPiuOOOq/bbAQAAoI1q0QH7nXfeiccffzwOO+yw6N+/f1EbveWWW8a///3vuOOOO2LxxRePY489NgYMGFCE6SWWWCLuvPPO4nevuuqq2HHHHWO33XaLQYMGxTnnnBP33ntvjBs3rtpvCwAAgDaoRQfsLl26RNeuXYsa6pkzZ8YLL7wQjz76aKy55prxxBNPxAYbbBAdOnQonpv32Yw8A3nK9Vm7XbHCCivEiiuuWCwHAACAdhWws4b65JNPLpp4Zz/qrJH+7Gc/W/S7njBhQiy33HKNnt+rV68YP3588fObb775sesBAACgXU3TNWbMmNh6663j61//eowaNSpOP/302GyzzeL999+Pzp07N3puPp4xY0bx8/Tp0z92/fz6XwU5rZzjCNWnHEJ1KYNQfcph29eiA3b2tf79739f9J3O5uLrrLNOvPHGG/GrX/0qVlpppY+E5Xycz6vUfs9pfTY5n1+dO9eU9E6ohk6dOtbfL7aYYwnVoBxCdSmDUH3KYfvSogN2TrvVr1+/+tCc1lprrbjkkkuK/tUTJ05s9Px8XGkW3qdPnzmu792793y//owZta4ytWKzZtXV38+cWVvt3YF2STmE6lIGofqUw/alRQfsDMsvvfRSUfNcae6dA5317du36JP961//OmbPnl0McJb3OQDat771reJ5uf6RRx6JPfbYo34+7bzl8gUxe3YzvDEWOccRqk85hOpSBqH6lMO2r0UPcrbNNtvEYostFieeeGK8+OKL8fe//72ovT7ggANihx12KOa2PvPMM2P06NHFffbLzoHQ0n777Re33npr3HjjjfHss88W03l97nOfK5qWAwAAQLsK2EsttVRcfvnlxYjhe+21V5x99tnFnNj77LNPLLnkkjFs2LD6WuqcfuvSSy+Nbt26Fb87ZMiQ+NGPfhS/+MUvirDdo0eP4vcBAACg3TURT6uttlpcdtllc1y37rrrxs033zzX383gXWkiDgAAAO22BhsAAABaCwEbAAAASiBgAwAAQAkEbAAAACiBgA0AAAAlELABAACgBAI2AAAAlEDABgAAgBII2AAAAFACARsAAABKIGADAABACQRsAAAAKIGADQAAACUQsAEAAKAEncrYCAAAC27s2BdjypR3mm37Y8aMKu6ff/65qK2ti+bUvXuP6N9/lWZ9DYCWTsAGAKiCSZMmxaabDom6uuYNvmno0EOa/TVqampi5MjR0atXr2Z/LYCWSsAGAKiCDKIjRjzWrDXYNTUdo7Z2etTUdFkkNdjCNdDeCdgAAFXS3E2qO3XqGMsss0S89da0mDWr+WvKAdo7g5wBAABACQRsAAAAKIGADQAAACUQsAEAAKAEAjYAAACUQMAGAACAEgjYAAAAUAIBGwAAAEogYAMAAEAJBGwAAAAogYANAAAAJRCwAQAAoAQCNgAAAJRAwAYAAIASCNgAAABQAgEbAAAASiBgAwAAQAkEbAAAAChBpzI2Agtj7NgXY8qUd5pt+2PGjCrun3/+uaitrYvm1L17j+jff5VmfQ0AAKBlE7CpikmTJsWmmw6JurrmDb5p6NBDmv01ampqYuTI0dGrV69mfy0AAKBlErCpigyiI0Y81qw12DU1HaO2dnrU1HRZJDXYwjUAALRvAjZV09xNqjt16hjLLLNEvPXWtJg1q/lrygEAgPbNIGcAAABQAgEbAAAASiBgAwAAQAn0wQZop5p7qrxFOV2eqfIAgJZAwAZohxblVHmLYro8U+UBAC2BgA3QDi2KqfIW5XR5psoDAFoCARugnVoUTapNlwcAtCcGOQMAAIASCNgAAABQAgEbAAAASiBgAwAAQAkEbAAAACiBgA0AAAAlELABAACgBAI2AAAAlEDABgAAgBII2AAAAFACARsAAABKIGADAABACQRsAAAAKIGADQAAACUQsAEAAKAEAjYAAACUQMAGAACAEgjYAAAAUAIBGwAAAEogYAMAAEAJBGwAAAAogYANAAAAJRCwAQAAoAQCNgAAAJRAwAYAAIASCNgAAABQAgEbAAAASiBgAwAAQAkEbAAAACiBgA0AAAAlELABAACgPQTsGTNmxGmnnRYbbbRRfOYzn4kLLrggZs+eXax7+umn48tf/nIMHjw49txzzxg5cmSj37399ttju+22K9YffvjhMXny5Cq9Cxa12trauO++f8a1115b3OdjAACAdh2wzzjjjHjggQdi+PDhcf7558cNN9wQ119/fbz33ntx6KGHxoYbbhg33XRTDBkyJIYOHVosT08++WSccMIJccQRRxTPnzJlShx33HHVfjssArff/sfYZJP1Ytddd4r999+/uM/HuRwAAKBdBuy33347/vCHP8Tpp58e6667bmy22WZx8MEHxxNPPBF33HFHLL744nHsscfGgAEDijC9xBJLxJ133ln87lVXXRU77rhj7LbbbjFo0KA455xz4t57741x48ZV+23RjDJEH3LIAbHmmmvFXXf9Pd59993iPh/nciEbAABolwH7kUceiSWXXDI23njj+mVZa3322WcXIXuDDTaIDh06FMvzfv3114/HH3+8eJzrs3a7YoUVVogVV1yxWE7blM3ATz31hNh++x3iiiuujY022rj4/uR9Ps7lp556oubiAABAs+gULVjWNn/qU5+KW265JS655JKYOXNm7LHHHnHYYYfFhAkTYrXVVmv0/F69esWoUaOKn998881YbrnlPrJ+/PjxC7QP/8vvtAIPPvhAvPzySzFs2PCoqWl87Sgff+c7R8dOO32+eN7mm29Ztf2E9srfU6guZRCqTzls+1p0wM7+1C+99FJcd911Ra11huqTTz45unbtGu+//3507ty50fPzcQ6KlqZPn/6x6+dH5841Jb0TFoWJE98s7tdZZ51YbLGa+pCdP3fs2KFYXnleLgOaX9NyCCxayiBUX6dOHevvnYO2fS06YHfq1CmmTp1aDG6WNdnptddeK0aG7tev30fCcj7u0qVL8XP2z57T+gzn82vGjFpXmVqRZZf9sMXCU089FRtuuHHU1X042vzMmbVRW1tXLK88L5cBza9pOQQWLWUQqm/WrLr6e+egbV+LDti9e/cugnIlXKdVVlklXn/99aJf9sSJExs9Px9XmoX36dNnjutzmwvifzOC0QpssslnYuWV+8XPfnZ+0ee6YTPxPKn4+c8viJVX7l88z3GFRU+5g+pSBqH6lMO2r0UPcpbzV3/wwQfx4osv1i974YUXisCd6x577LH6ObHz/tFHHy2WV343B0mryFCet8p62p6ampo49dQz4y9/uTMOOmi/eOihB4tRxPM+H+fyU089o3geAABAuwrYq666anzuc58r5q9+9tln41//+ldceumlsd9++8UOO+xQzG195plnxujRo4v77JedU3OlfM6tt94aN954Y/G7OZ1XbmullVaq9tuiGe2yy64xfPjv4plnno4ddtg2unfvXtw/88wzxfJcDwAA0Bw6zK5UAbdQWQOZ82D/9a9/LfpP77///nH44YcX03I9+eSTccopp8SYMWNijTXWiNNOOy3WWmut+t+96aab4sILL4x33nknNt9882I7yyyzzHy/9oQJ7zbTu6K55VRc//nPv2Pq1LdjySWXjo022kzNNVRBDuiyzDJLxFtvTavvgwYsOsogVN9///tkbL31FnHPPffF2muvW+3dYSH17r1U2wjY1SRgt25OKqD6lEOoLmUQqk/Abl8Bu0U3EQcAAIDWQsAGAACAEgjYAAAAUAIBGwAAAEogYAMAAEAJBGwAAAAogYANAAAAJRCwAQAAoAQCNgAAAJRAwAYAAIASCNgAAABQAgEbAAAASiBgAwAAQAkEbAAAACiBgA0AAAAlELABAACgBAI2AAAAlEDABgAAgBII2AAAAFACARsAAABKIGADAABACQRsAAAAKIGADQAAACUQsAEAAKAEAjYAAACUQMAGAACAEgjYAAAAUAIBGwAAAEogYAMAAEAJBGwAAAAogYANAAAAJRCwAQAAoAQCNgAAAJRAwAYAAIASCNgAAABQAgEbAAAASiBgAwAAQAkEbAAAACiBgA0AAAAlELABAACgBAI2AAAAlEDABgAAgBII2AAAAFACARsAAABKIGADAABACQRsAAAAKIGADQAAACUQsAEAAKAEncrYCAAAQGs0duyLMWXKO822/TFjRhX3zz//XNTW1kVz6t69R/Tvv0qzvgYfT8AGAADapUmTJsWmmw6JurrmDb5p6NBDmv01ampqYuTI0dGrV69mfy3mTMAGAADapQyiI0Y81qw12DU1HaO2dnrU1HRZJDXYwnV1CdgAAEC71dxNqjt16hjLLLNEvPXWtJg1q/lryqkug5wBAABACQRsAAAAKIGADQAAACUQsAEAAKAEAjYAAACUQMAGAACAEgjYAAAAUAIBGwAAAEogYAMAAEAJBGwAAAAogYANAAAAJRCwAQAAoAQCNgAAAJRAwAYAAIASCNgAAABQAgEbAAAASiBgAwAAQAkEbAAAACiBgA0AAAAlELABAACgBB1mz549u4wNAQAAQHumBhsAAABKIGADAABACQRsAAAAKIGADQAAACUQsAEAAKAEAjYAAACUQMAGAACAEgjYAAAAUAIBmxbphz/8YayxxhpzvT344INz/L2bbropttlmm4/dbt7mx9SpU+OWW25Z6PcArU2lfL322msfWXfttdcW6y666KJP/DqvvPJKsa28T+PGjYt77733E28X2oqP+z/3ST3zzDPx6KOPNsu2oa3Jc8qG559rr7127LDDDnH55ZcX6w844IBS/i/StgjYtEgnnHBC3HfffcXt+OOPj+WXX77+cd6GDBnS7PuQfzz/8Ic/NPvrQEuy2GKLxd///vePLL/77rujQ4cOpbzGCiusUJTjvE9Zxp988slStg18vMMPPzzGjh1b7d2AViP/R1XOP/N/4dChQ+Occ85RCcNcdZr7KqiepZZaqrhVfq6pqYnevXsv0n2YPXv2In09aAk23HDDImB/9atfbdSa47HHHou11lqrlNeoRnkGgIWR56EN/2ftvvvucfvtt8df/vKXqu4XLZcabFqdRx55JPbbb78YPHhwrLfeevHNb34z3nzzzUbPueCCC2L99dePLbfcMn73u9/NdVt//etfY6eddiq2tddee8VDDz1U39T84osvLh5nkyBoL7bddtvie5+huuIf//hHEbyXWGKJRs/NcrLjjjvGuuuuG3vssUf85z//adSs7uqrr46999471llnnfjSl74UI0eO/EgT8eyyka+X5S2b2qXx48fHd77zndh4441jk002iTPOOCNmzJhR/5r77rtvUQu3wQYbxB//+MdF9MlAy5AXfy+55JKijH3605+OLbbYoig/FVmO8nHl/+T+++8fY8aMqV/36quvxnHHHVffXSrXHXLIIfX/M/N36+rqinXZ9PV73/te8fzc1he+8IX429/+VqV3Di1Hp06dihZf6Y033ohvfOMbxf+6LCMPPPBA/fPeeeedOOmkk+Izn/lM8T/rmGOOKZal7AaS5fiaa64pyl6e0+b6yv+7jztPpWUTsGlV3n333aJpzuabb15cPRw+fHi8/PLLcemll9Y/J08ennvuubj++uvj6KOPjp/85Cdz7Mv27LPPxv/93//FYYcdVpyk77rrrkVYf+mll4o/ZgcffHDRFD2bBEF7MXDgwOjTp0/885//bPQPfrvttmv0vAy6p59+elEes5lcnjwceuihxYlGRZ6c57IsX1kDkEF5Tt1Bspxlecvn54nFQQcdFO+//35xcexnP/tZEfCzOV5F1qavttpqccMNNxThAtqTLG9XXHFFnHnmmXHnnXcWF5uy7Pz3v/+tf86wYcOKE/0sp1mesxxm2crnZZerbPKaZW/y5MlFAF9uueXixhtvjFNOOSWuuuqquPLKKxuV/wz1ua0999wzjjrqqBg9enSV3j1U18yZM4ua6/vvv7+4IF0pk3ne+Kc//am46HXsscfWt4I84ogjinEP8qLYZZddVlzQajgWUFYQ3XXXXfGb3/ymKJ+57UrT8487T6VlE7BpVaZPnx7f/va3ixOKlVZaqbgauP3228eoUaPqn7P44ovHj3/841h99dWLZjxf/OIX47rrrvvItjKcZ+1aru/Xr18ceOCB8dnPfrYYzKlLly7RrVu34uqkpqy0N3nSUOmHnSflDU8kKjL8Zm3YbrvtFquuumr84Ac/KMJ5npxXZPnLYL7KKqvE17/+9foa7IYyeGc5y/K29NJLx7/+9a8ipJ977rlFLfdmm20WJ598clEup02bVvxO9gXPE44BAwZEz549m/3zgJYkxy44++yzi7LRt2/foqY6/081/D+Y/8u+9rWvFWUkL4RlkM5ynGUsu2hUumHlhequXbsWz8nnZnnN1iN5sl/Ro0eP+NGPflSsz6CeF8SMT0J7khee8nuft2yxlaE3LwRn4E15MStbca288spFAJ4wYUJMmjSpCMhZ45z/z/L38pY/5//XF154oT6wn3jiicX/u6zFzttTTz01z/NUWjZ9sGlV8iQiT+hzALK8IphX0bO2Opu2VWTwXmaZZeofZ7/RvDLfVF5F/POf/1zUdFfkHzo1YrR3GaazlmrWrFnx73//uwjOvXr1+kj5yQtdDWXztkpT1NS/f//6n5dccsmifM1L/n7+Xp7UV2T5zn3J1iop9yUvgkF7tOmmm8YTTzwR559/flFe8n9hntBXmnWnhv8Ts+zlRa587tZbb91oW7ksR0XO5q4VGSJye1OmTCkeZ41c586d69fn44blHNq6/H+YlTmVSpw8F80LVQ3POxuWt/TBBx8UIbp79+5F+avIC1X5/y3XVcYayvDc8Pfz/11yntp6Cdi0KlmzlU3U8oQgm6Tmlb1sPponGxUdOzZumJEnHZV+Mg3V1tYWVxozsDfkxJ32LluGVMY7yBFTP//5z3/kOXmSMacy1fAkf07lbl7mtt2G93N6DrQXecH4rLPOii9/+cvFSX/WpmXNVkMNA3Ol7DT93zi3slQpw5XyNr/bgrYqL+o2DMFNNQzbFdlEvOGFqaZlqFK+UtPnVZqXO09tvfyFpFXJvmB55S/7l2XznBx4KefQbTjidz7O/psVOf1PNmFtKq8o5iBL+UezcsurhJW+p2VNSQStTZ5Qb7XVVkUztnvuuecj/a8r5afhha2UjxteqV8Y+fs5hdDbb79dv+zxxx8v9imb30F7l81Ds/VI9qPOE+9ssZXNURv+H8ymqQ3HLsnWH3MasDPLW/bdbti6JMc4yK4X2Zw8ZSuxhhfOsquHwT9h3rJ8ZUuQSnPwlC0vcxDR+flfOa/zVFouAZtWJf/hv/baa0Wz1QzSObhZDgjRcMTFbJaTV/SzP1r2vc7BIzKMN5X90+64445iMJc8+chm53mrNGvNfmk5+ET+cYP22Ew8a8ryyn3D5m8Ny0/2t87BWF588cU477zzipP6HOV0QWX/6wzVGRJyAMN8vRwkJk/sR4wYUfQP3WWXXYqmdtBe5MXhPJFueMuLxxmo839glrsMuznKdwbkhv8Hb7vttqJsZhPTHMxsxRVXLEbkr5S3POHPi1jZtzN/L8c5yOdmi5UcaCn7dVcuMuf/2uw3mr/zq1/9qgjkC1POob3J5uDZZzrPSbM85y1/3mijjYquV/Myr/NUWi5NxGlVckqgnAoo+8PkP/+cEiH/WFVGH05rrrlmMWpqNh/PE5FsSpd9xprK/qI5MnH+bt5n7Vj2acs/fCmbxWZA33nnnYuavKZ9UKEtyz5e2Q9sTrXXKUdMnThxYlx44YVFf80sd7/97W+LE4oFlU1dszYupzm5+eab45e//GURqrMM59RgGQJyRgBoT/KiVVN5QTnLSt5y6rv8v5T/F/OCcPbFrqgM7pmDM2VLr1//+tf1Tb0zPOe286JWTsmVA5rliORZG54113lBOmcHqMjpgXKQtFyfJ/Z5YXtOF92Aj8qZbHIGjQzL2ZQ8L17ntHfzY17nqbRcHWY3bFMEAECrlaP75xzyRx555CfeVp7Y5yjIOWsAAPNHE3EAAAAogYANAAAAJdBEHAAAAEqgBhsAAABKIGADAABACQRsAAAAKIGADQAAACUQsAEAAKAEAjYAtGLbbLNNrLHGGvW3QYMGxfrrrx9f/epX4z//+U+pr/Xggw8Wr/HKK6+Usr2ZM2fG5ZdfXsq2AKAlELABoJU7+OCD47777itu//znP+O6666LJZdcMr7xjW/Ea6+9Fi3V7bffHmeffXa1dwMASiNgA0Ar161bt+jdu3dxW2655WLgwIFx2mmnxfTp0+Ovf/1rtFSzZ8+u9i4AQKkEbABogzp16lTcd+7cuWhG/pOf/CR22mmn2GSTTeKhhx6K2traonn2F77whVhnnXWK+2uvvbbRNh5++OH48pe/HOuuu27suuuu8eyzzzZaf8ABB8QPf/jDj1320ksvxWGHHRYbbLBB8dpHH310TJo0KW666aY47rjjiudks/Nsfg4Ard2H/30BgDbjjTfeiLPOOquo2d5qq63i17/+dVx11VUxbNiwWGqppYpA++Mf/zhuvfXWOOmkk4qAnU3LzzzzzPjggw/ia1/7WowbN65oer7bbrsVzx09enScfPLJC7QfU6ZMia985SvF611xxRXRsWPHYhvf/e53i3169913i/3Mpu09evRots8DABYVARsAWrkMzr/97W+Ln2fNmhUzZsyIAQMGxM9+9rNYccUVi+UZtD/zmc8UP0+dOrWorc6a5i9+8YvFsv79+xeDl1166aVx0EEHxQ033BDLLrtsnHLKKVFTU1Ns7/XXX1+gPtN33HFHTJs2LS644IL6AH3GGWfEn/70pyJsZ9hP2bQdANoCARsAWrl99923aJqdMrguvfTS9eG1ol+/fvU/v/DCC8UI3tlsu6GNN964qGnOJtzPP/98rLXWWkW4rsjRyRdEbiODe8Pa6RzlPG8A0BYJ2ADQymWAbRig56RLly7zHFysrq6uvv92hw4d6h837df9cbIGfUGeDwBtiUHOAKCdyebeiy22WDzyyCMfGdQsm2tnYM9a5pEjRxbNzSvycUO5jWxuXpGBPPtuV6y22moxduzYoq91xX//+9/YbLPNYvz48UWIB4C2RMAGgHYm58jeZ5994sILLyzmos6Rvq+++uq45pprioHNMvjut99+8f7778fxxx8fY8aMiXvuuScuuuiiRttZb7314v777y8GSMttnH766cXAZhXZvzvD+jHHHFOMQJ4BPft05zRiyy+/fDEIW8rlOaUYALR2AjYAtEM5RdaBBx4Y5513Xuy8887FoGc5wncG7NSnT5+iP3bWNO++++7FSOI53VZD+dxtt902vvOd78Tee+9dBObcVkXXrl1j+PDhRbPx7Cf+jW98o6jVzsHX0qabbhqDBw8u1mWAB4DWrsPsuXXEAgAAAOabGmwAAAAogYANAAAAJRCwAQAAoAQCNgAAAJRAwAYAAIASCNgAAABQAgEbAAAASiBgAwAAQAkEbAAAACiBgA0AAAAlELABAACgBAI2AAAAxCf3/wBqz9YKJFaDxQAAAABJRU5ErkJggg==", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Side-by-side box plots\n", + "sales_by_product = [df_sales[df_sales['Product'] == product]['Sales'] for product in df_sales['Product'].unique()]\n", + "\n", + "plt.figure(figsize=(10, 6))\n", + "box_plot = plt.boxplot(sales_by_product, labels=df_sales['Product'].unique(), patch_artist=True)\n", + "\n", + "# Customize colors\n", + "colors = ['lightblue', 'lightcoral', 'lightgreen', 'gold']\n", + "for patch, color in zip(box_plot['boxes'], colors):\n", + " patch.set_facecolor(color)\n", + "\n", + "plt.title('Sales Distribution Comparison by Product')\n", + "plt.xlabel('Product')\n", + "plt.ylabel('Sales ($)')\n", + "plt.grid(True, alpha=0.3, axis='y')\n", + "plt.tight_layout()\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 5. Scatter Plots\n", + "\n", + "Explore relationships between numerical variables." + ] + }, + { + "cell_type": "code", + "execution_count": 62, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Scatter plot colored by category:\n" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Create additional numerical column for scatter plot\n", + "df_sales['Days_Since_Start'] = (df_sales['Date'] - df_sales['Date'].min()).dt.days\n", + "df_sales['Commission'] = df_sales['Sales'] * 0.1 # Assume 10% commission\n", + "\n", + "# Basic scatter plot\n", + "df_sales.plot(kind='scatter', x='Sales', y='Commission', alpha=0.6, figsize=(10, 6))\n", + "plt.title('Sales vs Commission')\n", + "plt.xlabel('Sales ($)')\n", + "plt.ylabel('Commission ($)')\n", + "plt.grid(True, alpha=0.3)\n", + "plt.tight_layout()\n", + "plt.show()\n", + "\n", + "# Colored scatter plot\n", + "print(\"\\nScatter plot colored by category:\")\n", + "products = df_sales['Product'].unique()\n", + "colors = ['red', 'blue', 'green', 'orange']\n", + "\n", + "plt.figure(figsize=(10, 6))\n", + "for product, color in zip(products, colors):\n", + " product_data = df_sales[df_sales['Product'] == product]\n", + " plt.scatter(product_data['Days_Since_Start'], product_data['Sales'], \n", + " c=color, label=product, alpha=0.6)\n", + "\n", + "plt.title('Sales Over Time by Product')\n", + "plt.xlabel('Days Since Start')\n", + "plt.ylabel('Sales ($)')\n", + "plt.legend()\n", + "plt.grid(True, alpha=0.3)\n", + "plt.tight_layout()\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 63, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAABJ8AAAL6CAYAAACYZPeiAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjMsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvZiW1igAAAAlwSFlzAAAPYQAAD2EBqD+naQABAABJREFUeJzs3QeYW+WV8PEzM5KmaHpvHpdxt7GNbVwwxRhYmukkGwghWbLJfrvpm2wCaSxJKJuQkLK7JNmEhGTZJATSSIAEMNWAjXHv3dN71xS1+Z7zjiXPjMd4ijRq/9/zCOveEdIr6Uq699xzzhvX39/fLwAAAAAAAEAQxAfjTgEAAAAAAABF8AkAAAAAAABBQ/AJAAAAAAAAQUPwCQAAAAAAAEFD8AkAAAAAAABBQ/AJAAAAAAAAQUPwCQAAAAAAAEFD8AkAAAAAAABBQ/AJAAAAAAAAQUPwCQAQcV588UX5p3/6J1m9erUsXLhQLrjgAvnnf/5ns36ydHZ2yv/+7/+etv7111+XnTt3Tto41q1bJ3PmzDGX2traM97O7XbLypUrze0+8IEPBPx5j6Sqqso83r/8y79IKHR3d8vdd99tnveiRYvMNnMm+proWL///e+f8TYvvPCCuc0PfvADiXS63Sxfvjyoj/G73/3OvF4///nPz3ibP//5z+Y2H/vYx856fw8++KC5rd5vINx1113m/vbt2zfm/3es27be9vrrrx/HKAEAiA6WUA8AAICx+PrXv26CHyUlJXLppZdKVlaW1NfXyyuvvCIbNmyQ9773veY2wXbFFVdIXl6e3H777f51//d//yf33nuv/Nd//ZeEwvPPPy933HHHiH976623pK2tLSjP+0zS09Pl4x//uMyYMUNC4ZFHHjGBCg1Qnn/++TJ9+vSz/j8//vGP5ZprrpHy8nKJZrqdOJ3OUA9DLrvsMklNTZVXX31Vurq6zPWReL1e+ctf/iIpKSlmGwzUY+v3SG5ubkDuDwAAnBnBJwBAxNi0aZMJPOnB53e+8x2xWCxDMnL0gPqJJ56Qiy++2BxYBlNzc7MJwgxfFwrJyckSHx//rsGnv/71r+bAXbOBAv283y349IlPfEJCZe/eveZf3VamTp06qv/H5XLJl7/8ZRNIjIuLk2j1oQ99SMJBUlKSXHXVVfLb3/7WZJbdcMMNZ/zsNzQ0yI033ih2uz0gj63fEcH+ngAAAAMouwMARIyXX37Z/Pv+979/SOBJpaWlyWc/+1lzXYMwsURfi7Vr18o777wjLS0tp/3d4/GYA3sttYolvswezY4brfnz58vWrVvlV7/6VRBHhsF8AadnnnnmjLf505/+ZP7V4BMAAIg8BJ8AABFDs1LUwYMHR/y79rD57ne/e1pWhwZffvazn8l1110nS5YsMZlR//Zv/yaVlZVDbqeBm//4j/8wmRiLFy82Fy3B+uEPf2h6JvkyMLR/i9q/f7+/B5D2DPrP//xPs1771/hu4/Pss8/K+973Pjn33HNl6dKl8sEPftCUwg3mu2/NuvnXf/1X06dI+1lpUOls/u7v/s48z5H6Xm3evNk8tzOVK03keSsNaunzf+qpp0x5mz5H7c8zvC/Oli1bZO7cuSZQ5nA4hgSJrr32Wpk3b565zdlokEJfS30v9bH0upZkDX8d9Xmr8847zyzreM7m3//938Vms8m3v/1tU845kb5Bw/v8+Hog6bi0vE9fN32PNfjy2muvmds8+eST/vdBX5PnnnvutPvV8rSHHnrIZO1oSeGFF14o99xzz2mZd76xaQ+yq6++Ws455xzzWvX394/Y8ymQn5OxWLZsmUyZMkXeeOONEUtDdfvQgLKWyK1YsWJMY/C95vr5+/CHP2xeg0suucQ8p5HeO/2Oeeyxx0z5ro5LX1+9/Ve/+tURA7vqb3/7m3mv9L71M/ajH/3I/131bvR90CCnBtR0O9Dt9P/9v//nz9gb3ktOvzO0z53eVh9PHyccSicBABgNgk8AgIixZs0a868edGpfp23btpkD5uElPBrEGNwrRhtNazBEb3vLLbeYg24NYNx2223+AIOW7ekB5y9+8QuZOXOmKV9bv369NDY2ysMPP2yCEUoPgLWPkdJeMXpdD4j1ANJ3YKwH+r7bqO9973vy6U9/2l82pJfDhw/LP/zDP8gf//jH056n9ozatWuX6aukmTgLFiw462tz0UUXmec/UtaXltxlZ2ebg9vhJvq8fQ4dOiRf+9rXTEDkyiuvNMGL4fR11+ekjdEHN+3W6xpQvPPOO8/aBFvf+8985jMmkKTj1ICDXtdg3be+9a0hY9V/1Uc+8hGzrGWAZ6N9obR5vQZ49PkEw/333y+PPvqoCWro+PW562N+4xvfkPvuu88EJ2+66SbzvPS5Dg5G6Pt16623yv/8z/9IaWmpeb80AKflpu95z3vMNjac3ndZWZkJPGnz9ZHKCQP9ORkLHY8G4DRgM9L2+9JLL5nH1dvobcczBn1tNXikQVINEmmwaySaPanvj2YT6mP8/d//vQlG/uY3vzHb0XDbt2+XT33qU+b+9H3R8WmZ5+c///mzPu8vfOELJtipz1vfG/3caPBVr7/55pv+2+k6DUodPXrUfLfoZyghIcE8jv7/AABEhH4AACLIPffc0z979mz/ZenSpf0f+chH+n/2s5/119bWnnb73/72t+Z2n/zkJ/v7+vr8659++mmz/utf/7pZ/tGPfmSWn3jiiSH/f01NTf/ChQv716xZM2S93va6664bsu773/++Wf/888/71+3YsaN/zpw5/bfffnt/d3e3f31LS0v/5Zdf3r948eL+5uZms+6tt94y/7+ua2hoGNXrcckll/QvW7bMXP/Yxz5mxtrZ2en/u8fjMWP/yle+0t/e3m7uX8fiE4jnrWPQ9b/4xS+GrK+srDTr//mf/9m/zuFw9F966aX98+fP79+3b595febNm9d/7bXXDnl/RvL222+b+7vhhhv8r5nS6+vXrzd/27x5s3+9Pk9dp8/7bAbf1ul0+u/vr3/9q/82+r7qOn2ffb7whS+YdXv37j3tPoe/Vk899ZR/m62urvav//a3v23W6+ugr4nP7373O7P+m9/8pn/dv//7v5t1//u//zvksV544QX/dj58bB//+MffdbsJ1ufE93z1s3k2FRUV5nPyoQ996LS/6Xatf9PbjHcMF1100ZDP30jv3bZt28zyZz/72SG3c7lc/u3h6NGjQ7ZtvTz22GP+2/b09PTfcccdZv3rr79+xm3hmWeeMev+9V//1dz/4NdhxYoV/RdeeKH/ffjEJz5hbut7/kq30euvv95sM4M/7wAAhCsynwAAEUXP9Gu5iZYaWa1Wk6GiM9098MADZvY7zXrQLA4fXznWF7/4RZPB4KMZJ5pNoFkmSsvbdKa64Q2Pi4qKTFbDmUpuzkbLqLS8RjMhtDG4j/Yh0kyKnp4eUxI0mI5ptE29h5feaRmOvh4+WrKnGSGaETaSQD5vffyz0abnmt2j2TWavabvizZL/+Y3vznk/RmJllApfS01k8tHr/v6fWnp30TpdqVj03Hpv5ppE0j6OhUXF/uXfdugllRpWaKPllep6upq86+Wk/3hD3+QWbNmmb5ng+m2r/ejmUP6mRj+eGcT6s+J/r9a5qYlk4PLBzs6Osz2rFlYvmyl8YxBMwMHf/5GUlhYaDK/NJNpMM2C0rGp4aWNmlE2+L3Q7EPNVlNPP/30u34vqC996UtD+tfp+DXzSTPNtAxR+b7PNBty8Daq2W/6ep1phkAAAMIJs90BACKO9gzy9Q3SkhQtUdmwYYOcOHHC9NLRgzXtVePrT6QH+gUFBUPuQ8tjfAeJSsvb9KL3uWPHDnNfx48fNwd8en1wed9Y7Nmzx98Xxtcw3aeurs78O7xfkJZTjYeWcelBqT6WBg0Gl9xpidzgPkuBft76uMNf4zPR0i8tUdLeVupzn/vckKDLmeh7qQEhXyBgMN86vU0gaNmglpvp7IpazhfIEjwNWAzmC4oMf98TExPNv76+PseOHTOzFep7Mrhs0aevr8/87cCBA0Neo9FsT6H+nCgNJunnWftc+QI6uv3q8x8caBrPGEbzGmjwSUtiNcinn1t9vSsqKsznc3ggyEd7TWkJ3GBaJqvb6btti3r/+v4+/vjjp/1NH1fp4+r3nJZT6oQB+j5oCa8G3jWYtmrVqrMGbAEACBcEnwAAEUunXNemyHrR/imaTfCVr3zFBAy0x48e1GvmhPYoOhs9cNceKtrbRbORlB6Ia58kzVLS7KHx8GXNaFDsTNrb20cMOoyVzvinDb9fffVVc8DuC0RpH6bhB8iBft6a8TEWmo3jCz5pz6LR0IwefW1GOuDW567vt+85BIIe7OtBv/ZT0ibcgXKmDJyzBRJ0W1ba+8fX3H4029No3ptQf06UZudpbybtM+ULPmn2kL5e2g9pImMY7Wfq17/+tem55uudpX3CNMBUXl5uAl2axTjYSK+Zfu708TRQ+G7fCxrkGs37qN9v2t/qpz/9qQmC/fKXvzSXzMxM8z2nfawAAAh3BJ8AABFBAw/ahFkbQmvZ3XCaoaEZApo1oTNDaVaR3lbLvEbK+FF6cKh/V1puo8EQna1KD3x1Fiw9uPMdFI/3oFrvXwM/euCqB6XBpkEdLVPS10APxLV8Z/CB+3DBet7vRoMHmknkC4p8+ctfNo3XzxYg0GCjBhs0UDK8ebjeZ29vr3nOgaLlTDqLnDbs1qDmJz7xidNu42vePTwjJpBBsMHPX+kMelqmGEih/pz4Xm8NlGoJoK/B+dtvv21mdhtcWhasMWj5q77fen/6r2YwaSmf0mX9DJ8pIDg8sKTvf0ZGxhkfS19PfT+HZ0OeiWYu6kXfC80O0//v97//vQnWaSadBqgAAAhn9HwCAEQEPfjUgzo989/U1PSut9WSF1/PpNmzZ0tNTc2IB6RayqMHsOrPf/6z5OTkmLIWLQvzHcxqQEP/fzU862G4kWYR0wNZLQMaXlrnmynroYceMgeTgaK9f7SHjPb+0awnfR76fM4kEM97rPSxNHvnYx/7mJlhTcuMdN3Z+ErztI/VcLpOx6mznwXSunXrTPBOx6s9dobzBRSHB5u0XCvQNJiq2VFasjXSe/Lzn/9c/vu//1taW1vHfN+T+Tl5N/pY+v9rxpluvxrU01K4wYI1Br1fpX3jNAjmCzwpff9Hut/BfZh8dBZO9W6zVOr3ggbIR3q9NbCks/b5yvYee+wx+e53v+sPWmnJ3Ve/+lUTEDvT5wEAgHBD8AkAEDE0y0HLyT75yU+OOKX8iy++aIJTl19+uT9TQsul9IBRgzyDe8FoloP2h9Emz0qzbjR7ZnAmg95em2PrQa3SKdEHBx0GLytf42Bfjx7lO3DW6dsHN4LW69o8XQMaE+mTM5xm/mj5kR7AagBKX4vBDY2HC8TzHgvNHtEgiQY7/uEf/kH+8R//0ZQ06bqdO3e+6/+rmW9KS64GN5XW675MIM0KCjTNzNJMq7179572txkzZph/X3rpJf86DZj88Ic/DPg49L26+uqr5fDhw/Kzn/1syN+08bS+Btpw/d0ybs4kWJ+TsdKyUQ0caw833X61D5X2NhosWGPwZd4ND25rk/fNmzeb61oqN9jBgweHTBign2sNFGkg2re9jkS/F/T11ob2g78v9HtNg0papuvLdNMsRt2eNFg9mK8R/eDm9QAAhCvK7gAAEUNn3dKDPW1CrOVlOuvVtGnTzAGhBjW2bt1qggEa1PG55ZZbTAaFHkBqI2bNlNCSHl2nTYh9zZS1tOfRRx+Vm2++2WQ96H3qQZ9m5WjDbg1wtLW1SX5+vrm9/qvZEHqgqCUvmiHja9b8yCOPmEwn7ceiB87ak0V7tGgTcL2tZq9oZkdtba2Z2erdMpPGQ18bbcKuYx78WowkEM97tPQg++677zbBGS2782UN6cxl+hrp37SU6Ey9jzSopgErDbxosEQbrPsCP5pBorMH6m0CTYMhOsOeBqGGW79+vcnA0X48lZWVZpvauHGjydILRlBAe5tpZs1//Md/mGCrzojn2541yKhBTs38G6tgfU7GSktU9b3VHke6nXz0ox89LaMwWGPQx9WSP/3c6mdVA9ia2aSBJ8200pnu9H4H05I3bZivn2cN/Oq2WFVVZcbtm61wJBqY0gCbfpfp661NxPV5aCBLH0Nnb/TN7qflnhpcvOOOO0wWnn7PaABSH0sDt4HsRwYAQLCQ+QQAiBh6cP3973/fNOnVgzU9MNSD1N/+9rcmE0IP2DR4oQeggw9mNRj06U9/2mRF6OxSb731ljmA1b4xviwRPbjWgzw9cNf1ejBZUlJiggoa9FLaS8lHy170oFwzTTQIoDQrRXvOaBBC78OXmaBBC81K0TKeP/3pT2aM2qhYAwW+0plA0mwnfR5ajjQ8a2S4QDzv0dIZ2o4cOSLvfe97hzQZ14CRBhL0gHqkWdwGu+uuu8zsczpGbUatB+tajqb/nwYBgkWDM9pzZzh9H3Ub1MwgbfSu26IGBPS1HN6XKhB029YG6HfeeacJDmlQU8s2NQio68cbyAzW52Q8NCtIM5c0m2l4yV0wx6Azy2m5mwaUdNvSz6l+r+g2/5Of/GTE+9X/R/su7d692zQr1+bouqzfRe9GA2r6XfalL33J/D+63ei2rGWj2vBcg1c+GsTSSRTWrFlj3hMNvmrASoNR+j75+nEBABDO4voD3cghwjQ2DsxCBAAID3l5aQG5H77fASC88P0OALH73U7mEwAAAAAAAIKGnk8AAAAAxkWLKKoaHeL2eKU41y6J1oRQDwlADGhs65GObqfkZiRLhn3kXpEILwSfAAAAAIwr8PTStmqpbBiYyTM12SpXr5oqyYkcYgAInn3HW2Tz/oFZjxPi4+TSZaVSlDMwQyjCF2V3AAAAAMassb3XH3hSXT0uOVQ1dEZAAAi0rYea/Nc93n7Zcbg5pOPB6BB8AgAAADBmI81b5I3pqYwATMb3zvDvnhifQy1iEHwCAAAAMGZ5mclSkJXiX062WWRmSUZIxwQgusXFxcnCGTlDlhdMzw7pmDA6FGQDAAAAGLP4uDi5/LxSOVbbKW63V8oKUiUlyRrqYQGIcktm5kpeRpJ0OJySl5Vsmo4j/BF8AgAAADAuCfHxZDsBmHQlealSkhfqUWAsKLsDAAAAAABA0JD5BESJOx/cIOHg0bvWhXoIAAAAAIAwQuYTAAAAAAAAgobgEwAAAAAAAIKGsjsAAAAgxrR19cmeYy3m+rypWZKdnhTqIQFAVDlW2yGVDV1iT7LIovIcsVoSJJYRfAIAAABiiKPXJc++VSFOt8csn6jrlPVrpkl6ii3UQwOAqHCkul1e31XrX25s65UrV5ZJLKPsDgAAAIghdc3d/sCTcnm8UtvkCOmYACCanKjvHLJc39otPX1uiWUEnwAAAIAYkmQ7vfQjyUZBBAAESvKw79SE+DixWmI7/BLbzx4AAACIMSV5qTJnSqZ/ubw4Q8oKUkM6JgCIJktm5UpWaqI/8LTmnCKxJMR2+IVTHAAAAECMWbWg0DTA7RcRe5I11MMBgKiSnGgxvfQcPS5JtCaIzRrbzcZVyENvXV1dcu2110pVVdWQ9Y8//rh84AMf8C/X19eb5auuukruuOMOaW5uNutdLpd88YtflKuvvlquueYa2bFjx6Q/BwAAACDSpCRZCTwBQJDEx8VJWoqNwFM4BJ+2bdsmt956qxw7dmzI+sOHD8uPfvSjIevuvfdeuemmm+TZZ5+V6667Tu677z5/kKq/v1+eeeYZ+d73vief//znxe2O7UZeAAAAAAAA4SKkwacnnnhC7rnnHsnPz/evczqd8tWvflU+9alP+ddpdtOmTZtk/fr1ZvmGG26Ql19+2ax/6aWX5MYbbzTrZ86cKQUFBSaoBQAAAAAAgBjv+fTAAw+ctu7b3/623HzzzVJaWupf19bWJna7XazWgbRgi8Uiqamp0tLSYsrxNODko4Gsurq6UY8hPj7OXAAEhiXGZ3EAAAAAAIRxw/GNGzdKbW2t3H333SbTycfr9Y54+/j4eFNyN9L60crOtktcHMEnIFCysuyhHgIAAAAAIIyEVfDpz3/+sxw6dEiuv/566e7ulqamJvnkJz9psqG0Mbn2ctKsJ/3X4XBIZmamyXpqbGyUqVOnmvvQ64Mzoc6mpcVB5hMQQK2tjlAPARGOACYAAAAQXcIq+DS4DE8zn/7zP/9Tvv/975vlFStWyNNPP236O+m/uqxleGvXrpWnnnpKli9fLkeOHJGKigpZtGjRqB/T6+03FwCB4XaPnKkIAADG70RdpzS09UiG3SazSjPI3AcQtjxerxyoaJPuXreU5NmlKIcTiwiz4NO70cbkWo73k5/8RDIyMuShhx4y62+//XYzE94111xjfoTvv/9+sdlsoR4uAAAAEBD7jrfI5v0N/uW2rj5ZMW/0mf4AMJle3V4jFQ1d5vre4y2y9twSKStIC/WwEGJx/SM1TYohjY2doR4CEBB3PrhBwsGjd60L9RAQ4fLyArNzwvc7gGjxp43HpLWzz79sTYiX2y6fLZGG73cg+vU5PfLrDYeGrJuSnyrrlp6aUAzRZbTf7UxLBQAAAIQx67CZZIcvA0C4SEiIO60sWAPmAFsBAAAAEMaWz8kXmyXBXE+Ij5OV8ym5AxCeLAnxsmJuvvjCTylJFlkyKzfEo0I4iJieTwAAAEAsystMlhsvmi7tDqekJVslJcka6iEBwBnNnZplGo1397klOy1RrCeD54htBJ8AAACAMJdks5gLAESCtBSbuQA+lN0BAAAAAAAgaAg+AQAAAAAAIGgIPgEAAAAAACBoCD4BAAAAAAAgaAg+AQAAAAAAIGgIPgEAAAAAACBomK8VAAAACJDDVe1S1dgpyYlWWVSeI8mJ7G4DY3GstkMq6vUzZJHF5bmSaEsI9ZCAiOfxesXrFbFaQpd/xK8hAAAAMEEtHb3y2HP7paK+SxKtCVKab5f61m5Zv3qaxMfHhXp4QFipbnLI4ao2cyC8qDxXUpOtZv3xug55dUeN/3YNrT1y9eqpEh/HZwgYr73HW2TLgUbp7++XGcXpcsE5RRIXgs8UZXcAAADABPQ63fL48weloqFLXG6vdPW6pLrRIU3tPdLZ7Qz18ICwokHZF9+pkuN1nXKoql2e21QhLrfH/E2Dt4M1d/RKd687RCMFIl9bV59s2d9gAk/qaE2HHK5uD8lYyHwCJujOBzeEeggAAGCS6Y58fWuPeL394nR7zQGy/6xuv5h1WuJgs1IyBAxW1dDlPxBWjl6XtHT0SUF2ymllqpo1aJvEMiENJDe190pKokWy05Mm7XGBYHH0uPQnadi60AR0CT4BAAAAY6AHzi9tq5bKhoEsDT1gTk5MkESbRdxel3g8/ZJojZcLFxXR8wkYZvhnQot/kk6u0z5pDa3dJgCkgafzFxROWgC3w+E0WVg9zoED83Nn5ZnxAJEsJyNJkmwJ0uv0+D9vRbkpIRkLv4YAAADAGNQ0d/sDT6qnzy0zijNMDw09gNWMib9fVy7Z6ckhHScQjuaUZUpNk8P0fdID4aVz8iTDbjN/035pV6+aKo5etwngWi2Tlzm440iTP/Ckth9qlLllmWQvIqIl2SxyxYoy2X64yZwYmT0lUwqyCD4BAAAAYcfb3y+1zd0m46koJ0U8Hu9pt5lenC6XL58iHm+/pCSxiw2cSUJ8vFy2fIp097okISHeBJwG0yCurwH5ZNLP7mD9I6wDfBNMdHa7JDczSexJk7+tjlVmaqKsXVIiocYvIwAAAPAugacXtlRJbbPDLOdmJMmlS0tNpka7Y6CZuJY0TCtMY0p4YAxSwuygfc6UTKms7zKfeTW1MI2yWZxm34lWeXtfvQlOWhPi5fLzpkheJlmuo8GnCQAAABhGG4gfrGqTxtYeqazvFMvJpsfai6aysUuuWjVVDle1m8wInbo6Es5+A+Ph9njlQEWb9Lk8MrUgzfSQiUZFOXa5amWZVDU5TMPxmaUZoR4Swoxmv75zoMHfwNvl8ZpyNs16xdkRfAIAAACGzXj1l7eOmwBUZ7dTGlp7ZWpBqj8ApTPcaanQgunZoR4qMCmZf/Wt3WZ5z/EWuXJFWdRmeuRmJpsLMBINOg2vxNQ+ShidyZu3EgAAAIgA2ghZA09KM5oS4nU6+IFl7edUVpAW4hECk6O9y+kPPPkCr0dq2kM6JiBU4uPiTBN6H22YP29qVkjHFEnIfAIAAAAG0T4ePjrd+5SCVJlakC6FOSkyoyidPjCIGZaEuNPXxZO/gNi1Yl6B5GclS4fDJYXZyZIfopnjIhG/nAAAAMAgpfmpUpqXKlWNXQPLeWlyybklJhAFxJK0FJssnJ4ju481m+X0FBvlpoh50wrTQz2EiETwCQAAABhWWrFuaYk0tvdqh1nTA0bXAbFo2Zw8mV6UJk63V3LSk8R6svcZAIwFwScAAABgmLi4OMmn8TBgZKdH5wx3ACYPYWsAAAAAAAAEDcEnAAAARLX+fqbCBgCcjt+HyUPZHQAAAKJST59bXtleIw2t3ZKaYpO1S4opHwIADPt9sMraJSX8PgQZmU8AAACISm/vb5D61m7R89qd3U5zoAEAwJYhvw8ueZnfh6Aj+AQAAICo1OFwDlnu7HFRYgEAkI7uob8PXd1Ofh+CjLI7AAAARLyGth45VNkmCQlxsnB6jqQmW6UwO0WaO3r9tynISjaz2AHj0efyyO6jzdLT55GyglQpK0gL9ZAAjFNBVoo0tZ/6fcjPSuH3IdqDT11dXXLrrbfKI488IqWlpfL000/Lj3/8Y/O3srIyuf/++yUjI0Pq6+vlc5/7nDQ1NUleXp48/PDDkpOTIy6XS+655x7Zvn272Vj09osXLw710wIAAMAk6HN65I3dtfL2gQaxJ1lN0KmyoUuuWzNdls7OM7fR0or0FJucNy8/1MNFhPL298sLWyr9B6tHatpNj5iphYELQLk9Xtl1pFm6elxSnGuX8pKMgN03gKH090FjTXUt/D7ERPBp27Zt8tWvflWOHTtmljXA9K1vfUv+8Ic/SHZ2tgkw/eAHP5Avf/nLcu+998pNN90kN954ozz55JNy3333yXe+8x15/PHHTXrcM888I4cPH5aPfexj8pe//EUslpDH1QAAABDEYMCRqnZ5ZUeNtHT0SrvDKe1dTinJtZu/N7b1SGleqiyfywEFJs7R4xqSJaFO1HcGNPj08rZqqW5ymOtHaztMMGpOWZaEM5fbK0dr2sXr7ZephemSksQxGCJDfHycLJvD70PM9Hx64oknTNZSfv7Am66ZS1/72tdM4EnNmzdPamtrTXbTpk2bZP369Wb9DTfcIC+//LJZ/9JLL5mAlJo5c6YUFBSYoBYAAACiU0V9h/zvXw/IM2+dkGO1HSYo4PF4/X2dVJItIcSjRDSxWRMkflhJTiC3MQ00+QJPPifqOsd1X/Ut3aY8sKqhS4JJx/y3tyvkrb31snl/g/z5zePS3Tvw+cP4OF0eE1gHolFIQ9MPPPDAkGUNQvkCUT09Pab87gMf+IC0tbWJ3W4Xq9Vq/qZZTampqdLS0mKypTTgNPg+6urqxhTx1AuAwLBYmMcAABAcelD21MtH5J2DjdLV7ZJEW7w5eWmzJPgbxVriB3o+5WYkh3q4iCKJ1gRZOb/ABFp0W8tOS5TF5bkBu/+E+DixJsSL62QQVSUljv1Q7XB1u7yxq9bM4KXOnZUriwI4zuFBrsHZYDp1vWZs6ecPY+8ntuGdKtO7Tr/PLlpS7M/iBKJFWOZFtra2mvK5+fPnm6wmDTCNJD4+fsSO9Lp+tLKz7TQWAwIoK4sfSgBAYHm8Xtl2sEkOVbXJjsNNmi9v1vc5vZKcmGD6dmSlJUl5SbqsW1oimalJoR4yotDsKZmm0XifyytpydaAnsDW45ELFhXJaztqxe31mh40eZnJ8vL2ahP40kDXaEra9h5v8QeeBpZbgxZ8Gun5D88OixV6TNrr9JhsuPEcW2472GgCT8rp9sgr26vlfZfOitnXE9Ep7IJP1dXV8uEPf1guvfRS02BcaRmeNiZ3u90m60n/dTgckpmZabKeGhsbZerUqea2en1wJtTZtLQ4yHwCAqi1dWjKODBWBDABDNbW1Se//OsB0xTW6fZKb59brJZ4sVnjxenyitWSIOdMz5GLzy2WguwUDtYQVEk2iyTZgnPfOnveey5JMUGM1q5eeXlbzZAso/XnTxNLwrufZB++/Qfz86CfN83O8ZULZthtMdkkXXvOvbi1Srp73WbCg8uWlUpGauKY7sPR6z6tl5bL5ZVEyocRRcIq+NTX12cCT+973/vkQx/6kH+9ltutWLHCzISnmVD6ry7r+rVr18pTTz0ly5cvlyNHjkhFRYUsWrRo1I+pzfH0AiAw3O5T6eIAAEzUm7vrpPlkaY+eL+w/+VuTZreZIMAVK6aYcigNCgDR0FtKL3uOtQxZrw31OxxOyU5/96w+LbN7aVu1eLz9JgNn6ZyBGR+DQQNb65aVSk2TQzyefjNDnwaGY83G3XUm8KR0psI399TLlSvLxnQf+tpVNZ7q0ZWTnkTgCVEnrH6l//jHP5rg0e9//3tzUVp6p72htDH53XffLT/5yU8kIyNDHnroIfP322+/3cyEd80115gv2Pvvv19stiCdjgAAAMCk6up1ScLJXjia9ZGaZDH9nGYUp5uZigI52xgQLpKH9XvS45zRBFhL8lLlujXTpbWzT9LtNslKG1sGzngCUDqrZCwb3mTdMY6m6/OmZmntnlQ1Okx55dLZwQsaAqES1z9S06QY0tg4vlkkAJ87H9wQ6iGElUfvWhfqISDC5eUF5kCS73cgsuguaX1rjyk3KcxONuV06s09dbLrSLMp7dHZtXIzkuSOK+ZKTgZ9nSIN3++jp5+DF9+pkvrWbhPg0ew+7TkV6zSwo5mQaSnBD6yN1us7a+VITbt/eU5ZpqyaXxjSMQHh+N0eVplPAAAAiD01TV3yyB/2SLujz2R3aOnQTReVm7KTFfPyJcmaINPae0w/leVz88RmYRcW0U3L17SkVGeQ0+u+YOxkzSr59r4GOVHXaT6DqxcUSH5WioSaNuR+4e1KkwWpnaxWzCuQuZoxFGKrFxaYbCXt/aRZmYvKme0vWia66OnzmEktEsYwoVkk6+pxmWCq/hbrhAdrzikyEx4ECr/cAAAACJmdR5rl58/uk65ul8TFx4nH65Lth5tkwfRsM0uX7vSfSwkKYpCW2qUkWSf9cfedaJX9Fa3meo/TLRu2VsvNF5eHvJ/TjkNNJvCktHRny4EGk2UU6pnL9TuKMrnootl1mnmo27+WwGoT+bP1W4sGr+2o8c+6WNnQJZv21stFi4sDdv8EnwAAADDpjlS3S3Vjl+w62iJOl8es69eJYOJE+pwe08AYwOTTDJ7B+lweU+6WOcYZ3AJNm6gPpou6hvktEQiaZXigss2UgB+t6TCBJ996Lf++ZvU0iXYtHX3v+l0wUQSfAAAAMGk6u53yqxcPSWV9l2glg8vdbxqJuzz9JvikB5Oa7RGLU7ZjYnS2MD1Tr/2SdPtZPicv5FkxkSg3PckcfPsk2RLEHoIMrJGacje0dpvvCDW3LNP0wwImSgOsz7x1wpSdKS05Lcm1i+Vktp+W38WCnIwk02du8HIgEXwCAADApHl1R40JPGlfGY+7X/pcXklPsYnN6pHuPrcUZqfIh6+eZ2bqAkZLsxNe2VYjbu9AWdbe4y2SmWqTWaU06R4r7aOkn8XjdZ0m8KTNzkNdcqd0ZssrV5ZJXUuPpKVYZXpReqiHhChR19ztDzyppMQE6exx+ZvalxXExoyOFy0uljd2a88np+n5tGp+QUDvn+ATAAAAJk1bp1Pi4+PE6+k3WSn2JIsU5dhNj6dZpRnmOjCejDpf4MmntXNoCQlGRz+Xy+bkm0u40cbn4dD8HNElIWFoBl1+ZrIUZdtNw33N/tHfp1iQkmSRy5ZPCdr9E3wCAADApMnLTJK2rmSpbXFIf79Iht0mN188gwNKTEiGPVFslgRxuk+Vx+iZewA4Gy2xm1qQJifqO81yYbZdLjuvNGZmuZssBJ8AAAAwaS5cXGxKeHIzkyQt2SoXLCqmxA4TphkKly4vlS37G0wD+5mlGZRlARh1tt/FS4rNLHdaEp6bmUw/sSAg+AQAAICA2X+iRbYeapJ+r8iCGdmyZGbukL/rtNWXLC0N2fgQvbRU5upVUyWSvHOgQfadaDUZFufNzTdBMwChCUBp0CkQdMY8Jjs4HcEnAAAATJhOyfzspgrZd7zFBJiKclJkx+EmM3NWaX5sNGsFxuJ4XYfsPtZirnu8HnljT53JCMxMHWhyDCCy1DY75PWdtdLj9Jgm5RcuKqJ0bxBeCQAAAEzYS9uqpbWz10yDrjNlNbb3mvXt3c5QDw0IS+1dztOyJTq7T824BSByuNxeeWlrtfn908/yibpO2XVkILiMAQSfAAAAMC5eb7//X52mOtFqEV+lgfbd0as0fQZGlp+VbD4jPpaEeMlOJ+sp2mlPIUSfHqdbXJ6hM252cPJlCMruAAAAMCYNbT3yyvZq6el1m1nqLllaYoJMjW09UpKbKs0dvebA+qIlxaYPD4DTFeXYZc2iItl/sufTubNzxZ5kDfWwECQapH99V60cr+0QiyVe1iwskqmFaaEeFgLEnmSR1GSrORHjU5DF799gBJ8AAAAwKq2dfVLX7JCNu+vMjHWqvrVbth5olEvOLZHN++pN2ZA2Tl46O0/i42m4Cryb8uIMc0H023uiRY7VdvhLtF7dWSO3ZJWbHnmIfBpAvnz5FHl7f4P09LlNYHFOWdakPX5/f78cr+uUXqdHSvLskp4SfrPIsqUDAABgVI1UX3inStxurzmAykpL9JfUdXY7zQHUxUtKQj1MAAhLw/t5aSZUd6+b4FMUSbfb5NJloZnN9dUdNSb4pKwH4+XKlWWSnZ4k4YSeTwAAADirvcdbzcGSZjMlWhNMFpSvd0lhTkqohwcAYa0we+j3ZEqixQQrgInSE0C+wJPS3lMHKtok3BBmBQAAwGma23tl34lW00B8wfRsfyNxpSn9TW29kp+RLCX5qXLOjJxQDhUAwt70onQzEcPR2g6xWRJk+Zw8f/lytGTHHq5qN/2s9DdB+x9hcsQN/oH2r5OwQ/AJAAAAfm6vVxpauuWlbTXiPjlzT2VDl6xeWCi1zd1mnc7KdcXKMlkyMzfUwwUQRrTvjNPtFZsl/rQDYt/fNHMyVmkPoMnsAzSZk1A8v6XKvMeqpskh154/TWwx/F5PptRkq8wuzZSDVQPZTkm2BJk/LVvCDcEnAAAAmJK6X284KG/tqRe3p98cNCycnm3OzPe5PCbodN2aaeYgQ3d0C7IotUNs63N6zMyOKUkWyUxNlFjX4XCavnBaAqSvyWXLppjecKqprUc2bK0209Hra3XZ8tKwmNlPy4ff2lMn3X1uKStIk2Vz8iQ+HFNGwlxVQ5c/8KR0xjf9bOiMjpgcqxcWmibn+nutJZ7h2Ess/EYEAACASbVxd608ueGQtHe7RQ+7EhLiTGPxozUdMqcs09wm2WaRtBSbuQCxTgMtz22uMLNa6Wdm6Zw8WTj9VPlpXUu3NLR2S4Y90RwQBlN1k0Pe2d9gshY1q2ZBiDIe3txTZwJPShtpv76r1mS/KJ3ZTQNPqq2rTzbvrZdLloamMbOPx+uVF7ZUmsCT2nu8RZITNeg+sTLi+pZu2V/RavrjLZqRIxkxEJhMtp2e4ZRkI9Qw2YpzwzvYxxYBAAAQo5o7euRXzx+SHUeaxeMdOGut/9XMJ0tCnDmDquaWZYX9Ti0wmXYcbjKBJ99nZuuBRpkzJctkCh6paZeNO2vNeqX9b5bOzgvKODTD5KWtVf7P75b9DZKWbDVZPJPN0Tt0NjdHz6llR+/Aa+XTNWw5FHr6PP7Ak09Te++E7rOlo1f+tqXSZJL6ys+uWzN90rJQdHvQ2UgT4uNkVmmGWC2TU/Y2uyxTqpoc5vlqMHbJrDx/1lsoMhK3HW6Unl63TMlPk5mlGSEZB0437k/B1q1bZdq0aZKdnS1/+MMf5Nlnn5WlS5fKRz/60REbXgEAACA86Cx1jW098pM/75XqRof/wNVH9+S0r9P5Cwvk6lXTwjJ9Hwgl97DPjC75Ag77T7T6A09KG/cHK/jU3tV32udXJwsIRfBJA9SDZ9gqGRSwLs6xS1Vj14h/CxXti6OXXudAkF1lTzBgUtXo8G8HSu9bv2sn4/3QwNOf3zjuP2lwpLpdLj9vini9YjK6gnmMnhAfL5ctKzVBRj1xEaqsJy39e35LpSn5UxUNXeYHbWYJAahwMK6t4te//rXce++98uijj0pWVpbcfffdsnr1avn5z38uLpdLPv7xjwd+pAAAAJiw43Ud8psXD0tTe485WNHjEb0MatchVmucrFlYKDdfPDOUQwXC1uzSDNOI39fnZmpBmiSeLD3ScqvBgtlDSEu69PEGBzyy05MkFFbMLZBES4I0dfRKVmqiLJl1akKCixYXy7ZDjdLucEpBVrIsDIMZMjXAfunSUnljd50pCSzLT5twyZ32ujpt3SQF77VM2hd4Urp9/vKvB0z/Ps1Cunz5lKCeSNDg1mhmuNOyxLf21puZ/2YUZ8jS2bkBC4xpJpsv8DS4HxXBp/Awrq3vscceky9/+csm4PSd73xHZs2aZQJRr732mtxzzz0EnwAAAMKQy+2VJzYclo5up8l+8nj6TX8nPQjThuJaqqHTgX/6PYvJdgLeRUleqly5Yorpt6TBhVlTBnqjqXNn5cmL71SZz5QeVJ83Nz9o49CD/bVLSuTt/Q3i8WjPp8yg95g6Ew2CnXuGDC8tR1wxr0DCTW5mslx3wfSA3V95cbopPdPSNw2nLCrPNY8xGfS73EeDojo7aUme3d9YfevBRllzTpGEkpbE6WfDdXIm1d3HmiUtxSqzB31+JkJnUtTfscHZgCMFBBEa43onqqqqZN26deb6xo0b5aKLLjLXy8vLpampKbAjBAAAQMDOCvtKTBKtFum1esxOupaapNttcvvls2VKCMp1gEiUn5ViLsPpTFPXXzDd9P/Rz1WwZ8Kbkp9qLgg9DTZqlpcGHDUYp8GQyTKrJEOOVLVLa1efyWS1J1nMRBE+2gQ+1Dp7nP7Ak8/wTKWJ0BMpFy4qltd21pjfttyMJFk881QGHiIw+JSTkyMNDQ1isVhk37598rnPfc6s379/v+Tm8uYCAACEA51CvKK+Uwpz7OaASA9GtPxCZ+LSKgczE1dBqlyxskzK8lMlPj4+1EMGAqa6scts6xoA0rKbyexLqxlJoylBQnQKReaoltddvXqqKWtLSIiXbQcbpaGtx//30hAGKLXvlZYB6k+MNSF+SAAqJ8Blopr5V5I3y2T6ak8v+lGHj3F9Kq655hoTcEpOTpbCwkJZsWKFPPPMM/L1r39dbrnllsCPEgAAAGPy7FsnzFTnSmez02nfb1k7U9536Sz508Zj0tbllNK8VLnpoumTNiMSMFkOV7fLxpPbv28Ws9ULCkM6JiDYNPNHS0LVpctKZfvhJjProDaDn1OWFZIxaQD4b29X+vuj6WyMqRarCQ7NKEoPWMnd8NdBL4iC4NNnP/tZE3SqrKyU97///ZKQkCDNzc3yvve9Tz7xiU8EfpQAAAAYkx1HhrZC2HOsRW5ZK5KXmSwfvmZ+yMYFTIZDladmXVOHq9pl1fwCsiAQMzQTKhz6bB2qavMHnlRnj0tuumiGpKXYQjouREjwSVOyP/CBDwxZN3wZAAAAk8Pr9cqTLx+RxrZe03D4suVTTIPfwSzDloFoNjzrQZsxE3iKPo5el5npT0scB7+/mlVT3dQlcRInpfl2SQjDkmJtSL//RKv0OD1SVpAqBSP0D4sGw3+L9F0iK+n07fhAhQbpRGZPyYjawNy4i1FfeeUV+elPfypHjx6V3/zmN/K73/1OysrK5Prrrw/sCAEAAHBGJ+o65PtP7jRTmOuU7nuOt0hje69cuaJMnnjpiDnA0fU6zTYQK5bOzpPmLb1m6nkNSqycH/oMEATWpr31sr+i1VwvK0iTi5cUm+86l9sjz7xVIW1dfeZvGtT5u/OmmAbg4UIzgV7YUiX1rd1med+JVrl8eakU5QzMThdNzpmRI9WNDunqcZnlJbNyx90Tq0N/5+LjoqqfWk+fW55584SZEEQdrm6T9edPE3vSyM9Re3rpa5mXlSzpERakGte7rjPcffzjHze9n7Zv327Otrndbrn77rvNB+mGG24I/EgBAAAwxN/erpDnNulBlvPkmn5xu72m0eytl86ST2bbzdn/guyUqD2rDowkJyPJzDinAQg9UI3WTIJYpQfgvsCT0okVjtV0SHlJhhyt6fAHnsxtW7uluskRVjMCavDAF3hSegx9pLojKoNPGkS5bs00ae7oM7P/6aQXY+Xt75dXt9fIifpOszy7NFNWL4yOHm41zQ5/4EnpjLRVDV0j9ujacbjJ9PGSk9ljGrAcacbNcDWufLcf/OAHpu/Tgw8+aPo9qc985jPmotlQY9HV1SXXXnutVFVVmeXNmzeb7KkrrrhC7rnnHhPUUvX19aa076qrrpI77rjD9JhSLpdLvvjFL8rVV19tgmE7duwYz1MCAACIGHqgoplOb+yuE++pVhr+674T/HoAvqg8l8ATYpJmV+jBPIGn6KMH6GdaN6i9kN/gnkPhQAMHcWcpTwuGUL0OOqlFYXbKuAJP6nhtpz/wpA5WtUlNk0OigW2ECT9GmgTE4/WayUN8NKt519EWiSTj2sIPHDgg69atO239lVdeKRUVFaO+n23btsmtt94qx44dM8tOp1O+8IUvyHe/+1157rnnpLe3V5588knzt3vvvVduuukmefbZZ+W6666T++67z6x//PHHzYdIZ9v73ve+J5///Of9ASsAAIBo0tntlKc3HpNf/u2gPLfphOl1kmiNF8ugchItSVi/elpIxwkAwZSflSzJNsuQYE5p3kDW0LSitCFlWdnpSWa2t3ALjJ47O88fgNIAqZanBYv+Vry2o0b+9/mD8sSGw1LV2CWRpM95+vG9ltRGg9I8uykb9SnJtZtteETDgoeaERb1ZXdpaWnS0NBgejwNdvjwYcnIyBj1/TzxxBMmu0kDRmrnzp1SWloq06dPN8u33HKLybK6+eabZdOmTSa4pLSs7/777zdZTy+99JJ87GMfM+tnzpwpBQUFJqh13nnnjeepAQAAhJ3qxi55fWetVDZ0moOq5CSrdPe6zZlyj7dfMtMTzXJxToq855KZMqs08FNXA0CwdPe6TOlRZmriqJpRa/DmqlVlpsddv1ebNGdKRupAVk2SzSLXrJ4qJ+o6TTB+WmF6WDa41mDT1II06XV5JCs1MaiZT3uPt8jR2g5zvcfplle218gta8tNGVw4O1jZJs3tvWLTkywJ8SbbRyXZBjKpRnvSpra522wz4VR66aM96dYuKZaWjj7pl37JSU8acXIEbZo/f1q22eaVbtsLp2dL1AeftExOgz960RfG4XDIq6++Kl//+tdN+dtoPfDAA0OWtbROg0c++fn5Zl1bW5vY7XaxWgci2BaLRVJTU6WlpWXE/6eurm7UY9A3LZyazwGRjtmUACDwBw0//tMecXv6xen2mB1o3QHVnW894NIdVe1vMm9qtiyIsB1RANAAw1t76001S1qKVa5YUXbGZsuDabbQqvkj9/3RANRIPXPCTbrdJumT8Dhapj2YBnH0hEU4B5+0d+HOo6fKzDRQl2hLEI3L6G/gaJqWa+Dqr5srxHUyaKW/masXhF+vqLi4OFMmfzbL5+aboJv2DCuYQBljRAWfPv3pT5sAj6+x+I033mi+LNauXWv6Po2XNi4f6Y0Yab2Kj48fsW5V149WdradaVeBAMrKCq+0ZgCIVPuOt8hf366Qw1Xt4vL0i1V7hMTFmb4mrZ29ppeNlpJcuKg41EMFECO0zGfrgUbTJDkt2WpmEUwZRaDoTFxur5m1zndM19ntku2HmmTNOUUBHDW079/h6nb/ckqSxQT6Qk3LAd852Ci1uj2l2GTlvHz/9uTL1PJpaO2R966bOeaTN77Aky/QuWTm+GfbCwelYZi9NVrjetU1A+nb3/62fPKTn5R9+/aZ4NDs2bNN2dtEFBYWSmNjo39Zr+u67Oxs05hcezlp1pP+q9lWmZmZJutJbzd16lT//zM4E+psWlocZD4BAdTaGh3N/xA6BDAR6/QgrLGtR3694bA5O607zm53vznbq2UHurOeYbeZ2X70LCiA0fH1R4nnxPO47TjU5C/7ae3sMxkY164ZaJkyHvodN7xvjdM9cuIBxm9maYbpkXSstsNkD503Nz8sShG3HWoyASLf9uToccn68wd6FmpWlm5fPlp6h8g2oZCfBnx8QZ9AWLx4sRw/flyOHDki5eXl8tRTT5lsKg12rVixQp5++mmTZaX/6rKu17/r7ZYvX27+P214vmjRolE/pu7A6QVAYOgU3wCA8dl/olXePtAgDS3dpk+FNtTVS5fHZfZXtLK5LD9VPnLtgrA4cAAixZ5jLbL1UKPp1zt/ahaB23Gqb+0ZstzS2WcCSOP9PtIMFM3g9M1cpmHBGUWTUYgWe7QsO9xKsxvauocsN3f0mlndtL/RivkFsuGdKhM0035Y4ymX0+db2dDlz36aU5YZ0VlPkW7Ur/zcuXNHXZ6m2VDjYbPZ5Fvf+pZ89rOfNTPdaRDptttuM3/TxuR33323/OQnPzFNzR966CGz/vbbbzcz4V1zzTVmfNqHSu8HAAAgUjS190hbp1M27a3TngNm59jp0h1wrylBiE+IMzPiLJyeI5cuKzU75gBGp6mtR7YcaPAva+aOzpY2eIYpjE5Gqk3qW7uHlG9NNBB+ybklsvd4q3T3uaQ0L9VcEBkcvS5TKqnZuOMJ6uj/p+V0Pjqhhu/3LT8zWW66aIbJfrInW8fVn0pnOrzugummrE/Hx7YVWnH9IzVNGsHvfve7UQefNDspUjQ2doZ6CIhwdz64IdRDCCuP3rUu1ENAhMvLC8zBAN/viATaKPyV7dWmKWqP6eXUJyV5dlMWpDvL+nebxSILpmXJ3186K9TDBSLy+11LjV7dUTNknWY+LZgWXlkgkxUs6HA4zaxy4wkWuNweeXVHren5lJpklYsWF4+qUTKij84m+OrOGpOVa7MkyOXLSyU3M3lM96FZTa/tqJHalm7TQ0y3Jw0YjYdm4JnJxCirDdvv9lF/49x0000TGQ8AAABO0nN/z22qMNNd61ldnbluRnG69Dk9/gPD4hy7/MPV80yjWADjl5eZPGSadj2hXpA1toPkaAsWaBnTZctKJX+M3y9WS4LJvox0GkTTDBt6/46fNor3ta/RmVi3HGiUK1eWjek+NJvpsuVTJjQO7RmmAazjdZ3m/Vw1v0BmlWZO6D4RHOMueHzxxRfl4MGD4vF4/OucTqfs2rVLfvaznwVqfAAAAFFFp4/esK1KjlZ3mINgDUTpDHZ1zd1Smm+XlESLCTjpNOEEnoCJ01IezcrYcaRZ+r39Zpr23IzYCz5t3ncqWKCzzL1zoFGuWhW4/r2RQJ//KztqpKI+tgMV+jocqWk3mUdT8lIlIzVxzPcxeBY5s+w+FReYTAdOtJrAk+95vbmn3swGq597REHwSfstae+l3NxcaW5uNrPLNTU1mUCU9l4CAADA6bbsb5Cn3zguPb1us+OecPKse0JcnLi8/ZKekijrz59qppwGEDia4XP58tgO5p4WLBi2HAv2VbSawFMsByr0hMdL26qlqrHLLO843CxXrSwbc7nbrNIM2XeiddByaIJ4Xb3u056flpfG0nsaKcbVHU5nm/viF78or7/+uuTn58v//d//metLly6VKVMmljYHAAAQjbQ0YO+JFlP6k5AQZ866axKClr9o2d25s3LkmtUEngAEx+xhwYFYzPhx9LhOC1R09w0NXkS7jm6XP/Ck9DfpQGXbmO/nvLn5cv7CQtM7TZvGz52aJaFQnJNiZkn00ezhrHFkciFMM58022nduoGmwnPmzJGdO3fKlVdeKZ/5zGfkS1/6knzqU58K9DgBAAAiih7UPPnyEdl7vEXsSVa5alWZaYSqO8Z6sJORYhOn22sajK87t0SWzmHqd2Ay6FTuu460SEtnr8n2WDQjJ+J6/zS09Uhtk8N8t5SXpI9qYihtsp6VlmgmMtA+WNE021+v0y0bd9VJS8fAe6pBkZEaqhfn2odk6+hsfVmpsRXwH2lT92XhjoVuc+EQwCzJS5ULFxfL4ep2sSbEy7mz88Q2jpnxEKbBp/T0dOnuHphis6ysTA4fPmyuFxcXS319fWBHCAAAEGEOVrTKT/6yzxwI6U69Njv+9YbDcvXKMtNrpbG9R7wekTXnFMoFi4pGPaMwgInbvLdBDlYNZHpUNnSZwMWq+YUSKTRrZcPWahPgVvWt3bLmnKJR/b/lJRkSjTTw5Mvm6W7skjd2143YGL00L9XMqHakut0EKJbMzDVN1MM9a3b30RZpbOuRDLtNlszKNb8p46XZtXPLsmR/xUAQTk+IaB+0d3v83j63JNksYRuknV6Ubi4Yu8NV7bLjSJO5vnhmrswM4nfEuIJPK1euNH2fvv71r8vixYvlRz/6kdx2223y17/+VbKzY2/KUgAAAJ9XtlfLU68cEUevW/TYsN/TL3Fx/dLpcEpqsk2uv2C6dHa7zPTk9KQAJl/loJIjs9zQJavmS8Q4UNHmDzwpzfhYMa/AlPDGKg30D9bcPnQ5kgMV2w42ye5jzeZ6VaP2OHLJ2iUlE7rPlfMLZFpRmplhVSe2SLSNHIBr7+qTF96pMrOyJtsssm5pieRmxl6z/mjV2NYjb+yuFd+3yRu7aiXTbgvaezyub6jPf/7z0tDQIM8++6xcccUVYrPZZM2aNfLNb35TPvjBDwZ+lAAAAGHM5fHIwcpWM5vU1oONon18B58f9ng1ABVnSl20JGRqYRqBJyBENNNjMC1dCyZtbF3d2GUyc7Tkb6KGl0jpd0t87MadjOHNsrPTo6fnj247Q5YbHNLT5zaXidCgk5ZeninwpN7aW28CT6rH6ZbXd9VO6DERXlo7+/yBJ6XXWzr7wivzqaioSP7whz9IX1+fdHV1yR133CHV1dVy0UUXyTnnnBP4UQIAAISpDkef/OhPe00fFS2p0wNDPTa0WOLF5Ro40NQG41eumCKFObE92xYQDrQf0IvvVJnea9rzZ/WCwqAGnv72dqUpjVO5GUlyxYqyCZVNaWlMXUu39Lk8Jsi9bE6eJMR49EnfUy21a9aeT2mJoy5DjATJSRZp7ToVENBeZU+8dNjfRH71wuBtv93DZpLr6fME7bEw+fSzot8hvgBUXJADt2MKPv3Xf/2X/OIXv5AnnnhCpk6dKnv27JGPfvSj4nA4TOrnli1b5JFHHpGkpLFN0wgAABBpNNC0cVetvLajRprae80BggaetCQiJz1RWrucpvlpeXG6/MPVcyUjlf0jIFyyZG5eWy69fR5JSkwwEwEEi5b0+QJPSr8rjtd1TqivijYN1/JdDbRoFtfwrJ9YpM3FR+rxFA1WzS+Q57dUSWe3U9xu75DySu1dVpqfKlPyUwP2eCfqOuXt/Q0mS294ULM03x6wx0HoaXmdBmp3HBko61xcniO5GcmhDz795je/kR/+8IfyoQ99SHJycsy6L37xiybQ9Otf/1rS0tLkE5/4hPz4xz+WT37yk0EbMAAAQCjptNTVjQ7Tg6OxrVf63B5TVtfd6zKNXPVgcMF03YFLknlTs6Qwh511INyYmSeTzn4opM3Ia5u7TXPqktyxf5a1WfNp67ynrxtPsEWbZyP66e/KjRdOl16nxwQz39xTN+TvEy2/G6zD4ZRXd9T4t9v+frdMyUszZZ2ZqYmyaOZAHADRo7wkY9ImIhh18Om3v/2t3HXXXfL+97/fLO/atUuOHz8un/nMZ2TmzJlm3T//8z/Lgw8+SPAJk+LOBzeEeggI4/fl0bvWhXoIAKLQ8doO+d1rR83MP32ugTPQ2WlJ0tHlFG3lopng2tdJsxKY6hmIbI5elzzz1gl/6dGM4nS5cFHxmO6jNM8u6Sk26eh2mmXt9VZWMHlBIy3P23V0IKvhnBk5UphN6W8k0r5eGnAszrWbjFqXNhbUg/mE+ICWc2vwaXDAVB83JzPJzAoITFrw6ciRI6apuM9bb71lNsaLL77Yv06DUDU1NRMeFAAAQDipqO+Qp145asoRdL88NcVqZgnS4FNWaqJML043B6gLp2fLpcunEHgCosD+E61Det4cremQhdNzTNnbaFktCXL16qlytLrd9FXRWdZ0yvrJoH3oXthSaTIzVX1Lt1x7/jTJSI2eZtzBphmtLk+/pKdYzbGvj5ZS7jzcbN7TBdOypGSSstA0eHnlyjLZc7zFLM+fmm2Cm4GSmZZoysd924zSLF5Mbkn/OwcbTDPwvIxkOXd2btT0dBvTN9/gD5z2d8rIyJC5c+f612nvp+Rkpl4EAADRQctj/vzGMXl2U4XZGR+Y3jxOdJdIS3aSbAnm39l5A01fmcEOiB4jVccNfAeMTaI1QeZNy5bJ1tDaMySIoNfrW3sIPo3SziNNsv1Qkwkw6cxwly0vNZlGmsX2wttV4j45c6EG9a5ZPXXE3ltaErflQIN0dbukKMduytYm2mNMH2esGXijpb9h65aWyjsHtOdTv8wty6K8c5Jt3F1rTnT5PsOa5RbMSRHCMvg0e/Zs2bp1q2k03tHRIZs2bZJLL710yG2effZZczsAAIBIpgeY7Q6nbDvYKBt314nH028OQAZiT/3mzKSelJs1JVNuuXjmkAawAKLD7CmZcriqXZzugRm+tORpLFlPoTZSMDwthQD5aLR39cm2Q01DMp00E27hjBxpbOvxB56UlqlpAGqk4NNL26rN7VWD/hsnYV/Cptt5ce70UA8j6rncXtNE3p5klUTbqWzpuuZTExQo7TkXLUYdfNJeT/fcc4/s27dPtm3bJk6nUz74wQ+av9XX18vTTz8tP/3pT+W+++4L5ngBAACCSmf42bC1WmqaHHKspsOU1+kBQ1z/wHTECXHxpuzuwkVFZnYlAk9AdMqw22T9+VOlqqHLlNJqydzgSpBwp0GExeW5svNkz6dFM3JM9g3OrtflOX2d/hZoAG+EoF7qCKVvOjmFL/A0JLAw0C4ZMay1s0+e31JpMuM0m27tkmJ/6aYGjfsGbX8jbW9RH3y67rrrTMDpV7/6lcTHx8vDDz8sixYtMn/70Y9+JE888YR85CMfkeuvvz6Y4wUAAAgqzXTQwJPSs5EacEpJTJDuPo8plygvSZd/ufEcSuyAGJlpLBQlc4GyZFauLCofmKEsPj5yAmehphNJDG4Ur0HHssI0cz0/K0WWzc4zmVG+nk9T8k8vTdOggs5+2j1oNjoyz6De3t/gn6VQg5Rv7K6T91wyEJW8YFGROQGmWVGaaakl/dEirn88hcvDaOaTzWaTrKwsiTSNjQP1lIg84TKrGsITs91Frry8gZ27ieL7HWOhZRO+Phw7jzTLtkON/p3C6kaHacCqwaaLlhTLubPyQjxaIDLx/R57fIeakZQx5qPBgd3HWsTl9kh5cYYUDJsp0Dcr3Lv1cNLMp5e3V5vG9do36pJzS4aUWCE2Pf3GcWnp6PUv6z7G7X83Z8htdP9DA5jR9N0ekKkWCgoKAnE3AAAAkz6V+svbqqW5vVfS7TZzYKDToO860mx6euiO3/xp2XL16jJJSeSMNQCMljbr3nWsWauWTQaWzhQYSZITLXLe3Pwz/n00jcPzMpPlPWtnmnLuaJmxDBM3vShtSPBJS3qHi5TA01hMzjyfAAAAYdbo8+399bJpb730ubySn5VsGoy/trNW1p8/zUxlfaiqTRJM8CmLwBOAgNBeLpv31pueLzkZSbJiXkFU9o2rbuySHUdONex+50CjKVfLz4zNmdEJPGEwDcQm2SzS0NotGfZEmTct8irIxoPgEwAAiBl69nnzvgZ5c3edyXrSZuJaOOH19ktJnt3f30MPCnMyoqfPAoDR6+pxme+HzNRESbQGtkTq9Z21UtXYZa63dvWZ0q0LFxVLtOnodp22rtPhjNng01hpUEIDdlp6pbMuzimLjeBELJlZkmEusYTgEwAAiAnat+M3Gw7LvhOtppGnprRrHxKbNd7f+LNwWE8PALHlaE2HbNxVa4JCyTaLXH7eFNP0N1AaWnvedTlaFGQlm7I0X18k7WmTS+BpVLQ/1AtbqsTl8Zrlt/bWiz3JKqUjNDUHIgn5fwAAICbsO9EmdS3d5np8XLw4XdrTKc7s1GenJcqs0gy54JyiUA8TQAibY7+1p84fMOlxumXrwYHJBwIlM9U2bDlwga1wkp2eZHroaZNtDepfuqxUMuxDnztG1tbV5w88+TS0RWeQErGFzCcAABCV9ABy77EWae7oNWV0TpdHbCdLaJKTEqS7d2CGu/lTs0x2g06pDiB2achJy5yG94cLJJ1G/ZXtNabkLjcjSVYviN7yXs3UIVtn7NJSrEOyxlTGsKAlMJEgu2aA60yMGvw+Z0aOxMdPzmyUBJ8AAEDU2XGoSf648Zh0drukIDvZzDg0JT9VslJtJgjV3uU002a/b91MmVEcWz0XAIxMD/j1++BITbt/XaB7smiQWyc1QOQdsB+oaJM6bRCdYpNzynOCNhuZbiMXLi6SzXsb/D2fysPkd0rLUncdbTbXl8zMlamFaaEeEsYxC+XOk++hSKfpcbdmkrK+CT4BAICooTPWbd5XJ8+/XWX6OOmJ44r6TtM0uKWjT65ZPc3sPFss8TJvalbAmwkDiGznn1NoMpI6e1ymXEyD1uFAs2COVLebSRI0myhay/XC1a6jLbLtUOOQ35q155YE7fGmFaabSzhpauuR13fWmAxB9cqOGlmfMtWUWCJynKjvHLpc10nwCQAAYCxaOnrl2U0Vcqy2wzRs1ZntdHprr3dg9qrkxASzk8yOMoB3y36aOzUr7LJuXt5WLZUNA7PkbT/cJFeuLJPcDBp4TxY9iTGYvhf6vuikFbFCS9j7h22XelKH39TIkpxoMcFTn6TEyQsJ0XAcAABEhUNV7aZEIdE6sHujBwW+HeV0uy2qe6sAiF5aPuwLPCmPt18OVraFdEyxeMA+WJItIaYCTypz2KyPcSM00Ef4Wzm/QFKSBrZnzf5ec87k7RuR+QQAACJSVWOXvLO/QRJtFjlnRraZyltpNoAerGlD3/RkqyyZlSc3XjQjaP05ACCYRmoGPFkNgjHgvHn50u7oM78tNkuCaRwfa3TmwpXzCky/IN36lszKldzM2Mi+05ly959oNZ87bdCdNSwQF456+tzS1N4r9iTLkOw0Ldm96aIZJkNcg6qTuW9E8AkAAESc/Sda5FcvHjaZTlomc7yuQ65eNVWO11nF0euSGcUD/TJWLyykrxOAiJaabJX507Jl7/EWs5ySaJGF03NCPayYkp5ikxsumCHdfW6T9RSrJzO0JDXcylIno6T/+S2V4vUO5FLXNDnkujXT/dlD4ai1s0/+urlC+lweEyhcNidfFkzP9v9dWxKEYobfsHzF/vjHP8qPf/xjc/2iiy6SL3zhC3Lw4EH58pe/LB0dHTJ79mx58MEHJSUlRbq6uuTf/u3f5MSJE5KcnCzf/va3Zdo0ZpAAACCabdxd558SXRvx6pTBLZ19cu2aaVLf0m0CTjqbHQBEg/Pm5svUglTpdXpMBkqijaD6ZNOsFw0EIrbUNDv8gSelAR3d5wjnmf52HG4y41Q68ncONsrcqZkm6BRKYRey7enpkfvuu09+8YtfmCDUli1b5I033jABps997nPy3HPPSXl5uTzyyCPm9t/73vdkwYIF8swzz5i/33XXXaF+CgAAIAhnHnVGls7ugSaZmu00mM5qp9kAGnQqK0gj8AQg6uRnpZjvNwJPwORJSTw94DierKf+/n6TNaXN613ugcBQsGhfuOGPrZOvhFrYBZ88Ho94vV7p7e011/VisViks7NTVqxYYW7znve8xwSb1EsvvSQ33nijub569WppbGyUmpqakD4HAAAQONpY989vHJeXt1fLn14/bnovmF4TGad6GMybmikzSzNCOk4AABBdphelycySgf0LPe21eGau5I2j19VrO2tN+d5L26rl6TdOSK/TLcEypyxzSEN8Hb/VEvrQT9iV3aWmpsqnPvUpueqqq0wZ3XnnnSdWq1UKCgr8t8nPz5f6+npzXf8d/re6ujopLi4edfokDfuA6GMJgy9YAIGxZX+Df9Y6t9cr2w8NTDOenZYkze29kp2eKNOK0kM8SgAAEG00iLPmnCJZPjdfNGxgtYw987CprUeO1Xb4lzWLW0+sLSrPlWAozUs1+0l1zQ6xJ1lNH8xwEHbBp/3798tTTz1lMprS0tJMKd3GjRtPu50vkqcpZMPFj6GWMTvbHnPTZE7UtZ/9Y6iHAJxVVpY91EMAME6a2VTV0CXJSRaZW5ZpejoN5lvWfgvh3HMBAABEh4lMXjJ8P2ak0rjx0KDW8bpOU4o7tyxrSHZTfmayuYSTsAs+vf7666Z8LidnYAaHm266SX7605+acjofvV5YWGiua9aTLhcVFZ32t9FoaXGQ+QREodZWR6iHgHEicBjbqhq7ZMM7Vf5Mp7rmbjPL066jzWZZf7HnxdhMOwAAIHLlZiRLflayNLT2+ANZ5SdL+carobVb/rq50h/Y0pN2V6wsO60nZjgJu+DT3Llz5T/+4z/k4x//uJnNbsOGDbJ8+XJ54YUXZPPmzabvk2ZGXXzxxeb2a9euNct6+02bNpn/ZyzBJ+1cP7h7PYDo4HaHQVc9AKOiWcx7jrVIZWOX2XnSHSdf6awGo85fWGj6O3U4nGbnTZvuAgAmRsuADlS0iSUhTpbOzpPs9FN99AAEjia7XL58ihyt6RCXxytTC9ImPHPikZqOIRlVDW090t7llKy0RAlXYRd8uuCCC2Tv3r0m48lms8nChQvlox/9qOkB9ZWvfMU0Hi8tLZVvf/vb5vbaH+pLX/qSrF+/3tz+m9/8ZqifAgAAGIO9J1rNNMCqpbNP+pwes2OmaU5aGq8HRjrDEwAgMGqbHfLajhp/lmlTe69cf8F0SU4Mu8NDICpYEuJl9pTMgN2fNeH0VkPh0FT83YTlt4sGm/Qy2OzZs+U3v/nNabdNT0+XH/zgB5M4OgAAMFGaxaTp5/Zkiymt88lJT5LKhi7TWFx3rJbPyRtXc08AwJnp9+7g2o8+l8cE/0sIPiHKM60r6rtM9lFJrj2ig60Lpmeb7PB2h9MsLy7PnXA2VbBF7qsNAAAijsvtleO1HfLGnjr/uiRbwpCzdrNKM+XSZSVmhpZ0uy1EIwWA6JWWMvS7VbvEJMTHSUtHr/ne1SwNINoCTy9vr5GK+k6zrIGnq1dNDfuAzZno+NefP818Zm3WBMlMDd9yOx+CTwAAYFJ0dDvl+bcr5UBlm3T3uqQwO8UcAPX0uaUw226aZ+qMLRctLpaiHBrPA0CwlJekS2NbjxyqbjfTx5fkpZrvZ+0howfjV6woi9iDcmAkrZ19/sCT0n2Pg5Vtpt9ZpLIkxEdUH0yCTwAAIKi6elyyeV+9aSrudHuk39sv2iOzrqVbUpNtpsH4ZctLRSdoSYjnbDsABJv201u9sFBWzi8Qb79XfvXiYX/zYv3O3nawUS5cXBzqYQIBM6g397uuwymHq9rlQGWr2TdbOidP8jOTZSIIPgGISnc+uEHCwaN3rQv1EICQp7m/sKXS9CTQPk/dfW5JS7GaQJPu9Onf50/PocQDAEI0C5fbNTADuAzrAQVEk+z0RCnNSzV9knwl/3PKAtcAPNpUN3bJxt21/mXdl7vxwhkT6pNF8AkAAARNr9Pjb4aZZreZ4JPXK2Y2Ow1CXbio2JR7AABCQ/vFDD4oVzOKM0I6JiAY2X6XLC2RE3Wdpv9kaZ5dUpIoLT0TnRRmMH3NJjopAcEnAAAQNNrDSc8uahAqwz5QYpeVliiLynNk/rQsyuwAIAxcvKRY9h5vFUevy8wCVlaQFuohAQGn+yDTi9JDPYyIkJ46bFKCuDhJm2AfOIJPAAAgqDt6lywtldd21Eh3r9sEnDTbSWe1AwCEBy191pMCAKDKizOkpaNPDlS0mu+H8+blT3gGYoJPAAAgqLRB5c0Xl4d6GAAAABil8+bmm0ugcNoRAACMi9b/e7SBEwAAQIxyuT2nNe3H6ch8AgAAY9LV45Rn3qqQ2iaHZKQmmrNiC6Znh3pYAABgHJraeqSupVvsyVaZVphm+vvg7PQE3Kvba6SiocvMHLl6QaHMLKFZ/5kQfAIAAKOmzWgff/6gVNQPzIo0MJNdvxTlpEh2elKohwcAAMZAZzncsLVa+vsHMnfqWjJNEAVnt+9Eqwk8Kc18emN3nRTnpDCL3hkQfAIAAGd1vLZDjtR0SGNbj7R3ufzr+1we6e7ziKPXLdlMIAMgQul3mU6QwGQIiLTMm52Hm6Wtq0/yMpNl/vRssx2Pxf4Trf7AkzpU2WYymrXJNN6do8c9ZFlfx+4+N8GnMyD4BAAAzsjb3y/PvHlc3txTb5aTbAnS4/RospP49m8TrfGSm0HWE4DIoweLmq1wuLpd9Ctt0cxcWTIzN9TDAkZFt92jNR3mumbgaBB12ZyxNYjWcrEh4uL8v+94d8W5dtlf0epftidZJcOeGNIxhTPCmQAA4Iz0DOiBynb/ck+fWyzxcZKTkWR2sqbkp8oNF0yX5ETOZwGIPMfrOk3gSWnux47DTSbDE4gElSdLvs60PBqLy3PFZknwLy+dnSsJ8YQJRkP3gS5aXCwluXaZXpQuf7diCtmT74I9RQAAMER3r1ssCXFisyb4r/toE9L8rBS55NwSyUpLNL2eaEwKIJL72A2n33tAJEhJtEi72zlkeaz0ZNL1F0yTxrZesSdbJDcjOcCjjG4adNJLMPX0uU2GWqL1VJAwEhF8AgAA/t4RL22tluomhyk/WTY3X0ry7JKZmmgOxrp6XKYHxJqFhcxuByAqFOfYZVt8k3+adD24y8ukjBiR4YJziuTFrVXS6/RIarJVVo6zUbj2KJpaSJ+icGx98OqOGjlR1xkVZcEEnwAAgLG/os0EnpQehr2zv0FuuHCGXLqs1JTfaW+Uc2bmSh5nRQFECZ2l87Jlpeb7LyE+Ts6ZkUOzYESM3MxkuWVtufQ5PZKUaBlzs3GEtyPV7SbwNLgsuDTPHrHZaQSfAACA0TOs1ER3dHqcbinNSzUXAIhGRTl2cwEikfZnSkmiz1A06uk7vQS4t88jkYqtFAAA+BtnDj5nqin82WnM2gIAADDZSnJTh2SzJdsskhvBZcFkPgEAAKMgO8WU2OnMTzpby6LyXLEOmgEHAAAAkyMnI0kuP2+KHKw8VRacZIvcEE7kjhwAAARcSV6quQAAACC0CrNTzCUaUHYHAAAAAACAoCH4BAAAAAAAgKCh7A4AgCi053iLmZ430Zog583Nl3S7LdRDAoCo1dzeK1sPNorb45VZpZkyszQj1EMCgLBC8AkAgChzqKpNtuxv8C+3dfXJ9RdMF0sCCc8AEGi9Trc8v6VS+lwDU6A3tPVIUmKClNI/DwD82AsFACDK1LV0D1nu6nGJo8cVsvEAQDRr7ezzB5586od9DwNArCPzCQCC6M4HN0g4ePSudaEeAiZResrQEjvNeEpO5CcfAIIhNdkqcXFx0t/f71+XNux7GABiHZlPAABEmYUzsmVqYZo5GEqyJcjFS4rFZk0I9bAAICppoGnNOYViTYiX+Lg4mT0lU2bR8wkAhuA0KAAAUSYhPl7WLikxZ+E1AAUACK7y4gxz4XsXAEZG8AkAgAihsyjtPtoind1OKcqxn3U2JQ6AAGBy8b0LjJ/u3+w+1iJeb7/MmZIpuZnJoR4SAojgEwAAEeLVHTVS2dBlrh+t7RCXxyvzpmaFelgAAAATnjXy2U0V0tPnNsvHajvkmtXTJCstMdRDQzT3fNqwYYPcdNNNcuWVV8o3vvENs27z5s1y/fXXyxVXXCH33HOPuN0DG2V9fb184AMfkKuuukruuOMOaW5uDvHoAQAIPI/XK1UnA08+J+o6QzYeAACAQGlo7fEHnpTH2y/VjUP3exDZwi74VFlZaYJL//3f/y1PP/207N27V1544QX5whe+IN/97nflueeek97eXnnyySfN7e+9914TqHr22Wfluuuuk/vuuy/UTwEAgAnTlPO39tbJ7149In/dXCGOHrdYLUObhmszcQAAcGaOXpc8v6VSnnrliGzcVWtK2BF+RtqnSbJRqBVNwi749Pzzz8vVV18thYWFYrVa5eGHH5b09HQpLS2V6dOnmzrqW265RZ555hlxuVyyadMmWb9+vfl/b7jhBnn55ZfNegAAItmOw01yoKJNOrtdUtfSLS+8UyUXLCoSS8LAT3dailWWzckL9TABAAhrL22tlpomh3T1uORwdbu8c6Ax1EPCCPKzUmTBtGz/cllBmswoSQ/pmBBYYRdKPHHihNhsNvnwhz8sDQ0Nsm7dOpk1a5YUFBT4b5Ofn2/K7dra2sRut5sglbJYLJKamiotLS1Dbv9u4uPjzAUAopnFEnbnGnAWTe29pzXhzM9KlvesLZcep0dSky1mVjsAADAyb3+/tHQM/T1tau8J2Xjw7pbPzZf507JMyV1qspUG/lEm7IJPHo9HXnvtNXn88cdNYOlf/uVfJDn59C73uiF6vSOnTMaPYWc8O9vORg0g6mVl2UM9BIyRNtisaXb4l1OSLGKzxJvfLJuVcjsAAM4mPi5OMlITpa2rz7+OBtbhLSVpILEE0Sfsgk+5ubmyevVqycnJMcuXXnqp/PWvfx0SIGpsbDRlednZ2dLV1WWaj2vWk/7rcDgkMzNz1I/X0uIg8wlA1GttPRXECHcEygYsmZVrGm9WNznEnmSRNecUcbIEAIAxWntuiby+s8aUsRdkp8h5c/NDPSQgJoVd8OmSSy6Rz3/+89Le3m5K6F5//XXTA+qRRx6RI0eOSHl5uTz11FOydu1aU263YsUK05j8xhtvNP/qsq8Mb7QNXfUCANHM7aa5ZqTR3k4XLi4O9TAAAIhoGXabXLN6WqiHAcS8sAs+LV68WD7ykY/I+9//ftM4XLOg3vOe98iMGTPks5/9rJnpbtGiRXLbbbeZ2+vMeHfffbf85Cc/kYyMDHnooYdC/RQAAAAAAABwUlx/f39Mp/00NnaGeggR584HN4R6CADG6NG71kmkyMtLC8j98P0OAOGF73cAiN3vdqbJAQAAAAAAQOyU3eHMyDgCAAAAAACRhswnAAAAAAAABA3BJwAAAAAAAAQNwScAAAAAAAAEDcEnAAAAAAAABA3BJwAAAAAAAAQNwScAAAAAAAAEDcEnAAAAAAAABI0leHcNAAgXdz64QcLBo3etC/UQAAAAAEwyMp8AAAAAAAAQNASfAAAAAAAAEDQEnwAAAAAAABA0BJ8AAAAAAAAQNASfAAAAAAAAEDQEnwAAAAAAABA0BJ8AAAAAAAAQNJbg3XX0uPPBDaEeAgAgQBy9Lnljd510OJySn5ksqxYUitXCuRgAAMJNQ2u3bNrXIC63R2YUZ8iSmbmhHhKAcSL4BACIKa9sr5HGth5zvavHJfHxcbLmnKJQDwsAAAzS5/TIi+9Ui9PtMcs7DjdJarJVZpZkhHpoAMaBU70AgJjS3N47dLlj6DIAAAi9jm6nP/B0pt9wAJGD4BMAIKZkpSW+6zIAAAg9zXKyJgw9XOU3G4hcBJ8AADFl7bklUpCVIkm2BCkrSJOV8wpCPSQAADBMcqJFLllaIhl2myTbLLJwerbMKqXkDohU9HwCAMTcmdQrV5aFehgAAOAsinLscsOFM0I9DAABQOYTAAAAAAAAgobgEwAAAAAAAIImrr+/vz94dw8AAAAAAIBYRuYTAAAAAAAAgobgEwAAAAAAAIKG4BMAAAAAAACChuATAAAAAAAAgobgEwAAAAAAAIKG4BMAAAAAAACChuATAAAAAAAAgobgEwAAAAAAAIKG4BMAAAAAAACChuATAAAAAAAAgobgEwAAAAAAAIKG4BMAAAAAAACChuATAAAAAAAAgobgEwAAAAAAAIKG4BMAAAAAAACChuATAAAAAAAAgobgEwAAAAAAAIKG4BMAAAAAAACCxiIxrrGxM9RDAAAMkpeXFpD74fsdAMIL3+8AELvf7WQ+AQAAAAAAIGhiPvMJABB8LrdHth5sko5upxTlpMiCadkSFxcX6mEBAAAgwrjcXtl2qFHaHU4pzE6RhdPZr4wEBJ8AAEH38vYaqWlymOv6r9fbL4vKc0M9LAAAAESYV3fUSFVjl3+/0uPplyWz2K8Md5TdAQCCytvfL7UnA08+VY1DlwEAAICz6e/vl+rT9isHAlEIbwSfAABBFR8XJ8mJQxNt7Ukk3gIAAGBstLwuZfh+ZbI1ZOPB6BF8AgAE3UVLiiXRmmCuZ6Ulynlz80M9JAAAAETofmWSbWC/MjM1UVbMY78yEsT1a95aDGOqVgCYvPI7l8sriSd3Fs6EqbgBIDrx/Q5gsvcrET7f7WQ+AQAmrfyOHQQAAABMFPuVkYfgEwAAAAAAAIKGjq8AAABh7M4HN0g4ePSudaEeAgAAiFBkPgEAAAAAACBoCD4BAM4qxuemAAAAAMaMfehTKLsDAJxRbbNDXttRK70uj5Tm2eWixcViSeC8BQAAAHAmLrdXXt1RI9WNXZKcaDH70AXZKaEeVkhxBAEAGJHb45WXtlVLj9NtztpUNnTJziPNoR4WAAAAENa2H26SqsYu0byn7j632af2emM7C4rgEwBgRL1OjzlrM1iHwxmy8QAAAACRYPg+c5/LYy6xjOATAGBEKUkWSUuxDVlXGOPpwgAAAMDZFOYM3WfOsNskyZYgsYyeTwCAEcXHxcnfnTdF3t7fID19binLT5W5U7NCPSwAAAAgrM2fmiUej1eqGh2SkmiR8+blS1xcnMQygk8AgDNKTbbKJeeWhHoYAAAAQMTQQNOi8lxzQZiX3XV1dcm1114rVVVVQ9Y//vjj8oEPfMC/XF9fb5avuuoqueOOO6S5mWa4AAAAAAAA4SIsg0/btm2TW2+9VY4dOzZk/eHDh+VHP/rRkHX33nuv3HTTTfLss8/KddddJ/fdd98kjxYAAAAAAAARFXx64okn5J577pH8/Hz/OqfTKV/96lflU5/6lH+dy+WSTZs2yfr1683yDTfcIC+//LJZDwAAAAAAgNALy55PDzzwwGnrvv3tb8vNN98spaWl/nVtbW1it9vFarWaZYvFIqmpqdLS0iIFBQWjeqz4+DhzAQAAAAAAQIwEn4bbuHGj1NbWyt13320ynXy8Xu+It4+PH31CV3a2Pea7zgMAAAAAAMR08OnPf/6zHDp0SK6//nrp7u6WpqYm+eQnP2myobQxudvtNllP+q/D4ZDMzMxR33dLi4PMJwAII1lZ9lAPAQAAAECsBZ8Gl+Fp5tN//ud/yve//32zvGLFCnn66aflxhtvNP/qsq8MbzS83n5zAYBI19/fL1sPNsnRmnaxWuJl5fwCKcohkAMAAIDJ09rZJxt31Up3r1tK8uyyakGBJIyhOgnRKeK3AG1M/sc//lGuueYa+e1vfytf+cpXQj0kAAiJQ1XtsvtYs3T3uaXd4ZSXtlZLr9Md6mEBAAAgRnj7++WFdyqluaNXepxuOVzdLjsPN4d6WAgDYZ35tGHDhtPWrVy50lx8ioqK5Oc///kkjwwAwk9LZ++QZZfHK53dLkmyhfVXPQAAAKJEn9NjMp4G00AUEPGZTwCAAbnpyUOWtfQuPcUWsvEAAAAgtiTZEiQ1eWgbnNyMofuoiE2cDgeAKDGzNEO6elxypKZdbJZ4WTG/QBJtCaEeFgAAAGKEziR/6bJSeXN3nTh63VKaZ5dF5TmhHhbCAMEnAIgiS2blmgsAAAAQCpmpiXLVqqmhHgbCDGV3AAAAAAAACBqCTwAAAAAAAAgagk8AAAAAAAAIGoJPAAAAAAAACBqCTwAAAAAAAAgagk8AAAAAAAAIGoJPABAi/f394u3vD/UwAAAAworXy/4REG0soR4AAMQal9sjL2yplMM1HZKWbJMlM3NlyazcUA8LAABgUgNMcXEicfqfk9wer7y6o0aqGrrEZk2QCxcVSUleakjHCUQ6j9crx2s7zeerrCBNkhNDEwYi+AQAk6imqUt+/PReaWrrlYSEOMlMTTTZT3lZyVKSaw/18AAAAIKe+f3G7jo5Ut0uCfHxsnJBgcwsyTB/23WkWSobusz1PpdHXtleI++5ZKZYLRTsAOMN8j7/dpXUt3ab5Z1HmuWa1VMlJckqk41PMQBM4s7W028cF0eP2yx7PP3S2e2Urh6XdDqcoR4eAABA0B2sbJPD1e2ihXVur9cEonR/SHWc/NfH5fFKr3NgvwnA2DW09fgDT6q7z20+f6FA5hMABJHL7TVnGBy9LinMThan22synkQzzPsHzkbEx4nkZyWHeqgAhrnzwQ2hHgIARJ3ObtdpJ+ccvW5JS7FJUY5djtd1+v+m6+yTmKFxrLbDZF6lJFpk8cwcsVoSJu2xgWCIG8PaYCP4BABB9NLWKqlt6fbv0KQlW8WeZDE7Wn1Oj2SlJ8m150+T7PSkUA8VAAAg6IpyUmTP8Rb/cpItQbJSE8312VMyTbZTRV2n6UuzfG6+xOtZukmg+2nab8qnsa1HrlxZNqQnFRBp8k+29qhucpjl1GSrzCodKHOdbASfACBItFeBL/DkU5CVItML06W62SF5GUmy9twSzqoBAICYoQ3EL1hUJIcq200vp6Wz8yTRdmpfaMG0bHOZbIMzrnzlSj197pD0xgECRYOn65aVmib+Hm+/FOfaJdEammMPgk8AEMBg047DTdLd65Yp+akyvShdLPHxpp+BT7rdJmvOKQrpOAEAAEKpvDjDXMKJZmANphlXnCDEyC01mkz5qGbxzSnLknAXHxdnZrkLNYJPABAAWkb3wpZKaWrvNcsn6gfOnumZvdd21pgzDRl2m5w7KzfEIwUAAMBwS2bmmlK71s4+E3has7CIWfZwGi3NrGrs8u/va5nowuk5oR5WRCD4BAAT0NLRK2/uqZMOh8v8EOVlJouvNUBFQ5dccm6JFOXMlF6XR1KTrJPWtwAAACAQtPxs0546M2mKZnXribRo7IOkPabWnz/NzEKsZUmhKk1C+NKJgqpPBp58tEE9wafRIfgEAOPk8XrlhXeqTD8A/TFqd/SJJSHO3zw8OXFgp8VmTTAXAACASGsp8OKWKnG6PWZ519FmM3FKJJQajbc8KT3FFuphIEzpSWTdp9fPxeCgJUaHPEIAGKeePo8JPPl+jAqzU8TpGujvlJOeJOfOzAvxCAEAAMbP0ePyB558Wjr6QjYeINQuWlws1oSBMIoGKpfPyQ/1kCIGYToAGCfNbEq2WaTHORCASkuxyfkLi2ROWaakJFqiMiUdAADEDnuyVWyWhCEBqOz0xJCOCQglnS3uvetmSq/TIylJFpMth9Eh8wkAxikhPl4uXVZqyuw02DRnSqYsmZUr9iQrgScAABDxtO/RumUlkpWaaPZ1FkzLltlTMkM9LCCkLAnxkppsJfA0RmQ+AcAE5GQkybXnTwv1MAAAAIKiICtFrrtgeqiHASDCkfkEAAAAAACAoCH4BAAAAAAAgKCh7A4AdCphp0dqmh1m9oqSPDs9mwAAAIAo1uFwSmN7j5m1Li8zOdTDiXoEnwDEvO5etzzz1glx9LrMcllBmqxdUkwACgAAAIhCtc0OefGdKvF4+83y8rn5pqE+goeyOwAx72Blmz/wpCrqO6W5vTekYwIAAAAQHDsON/sDT2r7waaQjicWEHwCEJP6+/tHvO7jneTxAACAyDDSfgOAyOaVfj7bQUbZHYCYcrSmXTbvaxCXxyuzSjJk5fwCmTUlUw5WtUmv02NuU5Rjl9yMpFAPFQAAhFmZzus7a83+QllBqlywqEgS4gN/Ll8PgCn9B4Jr/rQsaWjr8Qeczpmew+cuVoNPXV1dcuutt8ojjzwipaWl8vTTT8uPf/xj87eysjK5//77JSMjQ+rr6+Vzn/ucNDU1SV5enjz88MOSk5MT6uEDCMOGgn97u1K2H26URGuCCTAdqGyT7PQkmT0lU9afP00q67vEaomX6UXpEs+PDwAAOMnl9spL26rNv+p4Xadk2BNlyazcgD1GU1uPvLKjRhy9binKTpGLlxSLzZoQsPsHcIr2eL16VZk0tvVKWopVSvNSQz2kqBeWZXfbtm0zgadjx46ZZQ0wfetb35LHHnvMBKFmzpwpP/jBD8zf7r33Xrnpppvk2Wefleuuu07uu+++EI8eQDjpc3nktZ018j9P75X9J1rF6+03ZyzrW7v9QSllT7LK3KlZUl6SIfHxBJ4AAMApPU63P/Dk09E9sA8RKBrc6upxmUwMnYH3nYONEu7qW7rluU0V8pc3j8uRmvZQDwcYk9yMZJk3NYvAUywHn5544gm55557JD8/3yxr+tvXvvY1yc4e6D4/b948qa2tFZfLJZs2bZL169eb9TfccIO8/PLLZj2A2ObxeuVvb1fIQ7/aJi9vrZbmjl7p7nOZYJTy7UDmZzGtKgAAeHf2JIukJluHrCsI4D6E7pd097mHrPOdIBsLt8crb+6pkz9tPCav7qiRvpMtBYJBJ2t54Z0qc0Kvqb1XNu6slbqWgZN7GLu2rj5zorSyoSvUQwFip+zugQceGLKsQShfIKqnp8eU333gAx+QtrY2sdvtYrUO/BBYLBZJTU2VlpYWKSgoGNVjaYYDWQ5AdHF5PPKbDYdl77EWaXc4RT/hCZZ4SbFZJCXRKsmJCZKVlmh6NcwoyQj1cAEAQJjT3k6XL58ib+9vkJ4+tynZmVOWFbD717J/bQXQ0nFqtt2C7BRzMk1bAYy2F83mffVyqGogA6m1s8+cdNNxB4PODKzBLh/tnNPQ2i2F2SlBebxopgG859+u9M++Nrcsy/QlBaJJWAafzqS1tVU+9rGPyfz58+XGG2805XgjiR9D47/sbDuNxYAo0dTeI7/bcFgqGzqlsq5TLJZ4SUiIF4/HK5b4eMlIS5R0u00uPLdU1iwqNjt6AAAAo6H7EJcuKw3a/V+6tFTe3l9vSu8Ks1KktbNXHv/bQbEkxMuac4pkamHaWe+jobXnXZcD/XrocdTgGcK0D1asNqPXYF9OepIJGo7V3uOt/sCT2l/RKktn54rVQs8vRI+ICT5VV1fLhz/8Ybn00ktNg3GlZXjamNztdpusJ/3X4XBIZmbmqO+3pcVB5hMQBfTs22PP7ZeGlm5zFq7X6ZZ+p0hqklV6zE5RvyyYliWXnTfF9Hfq6gzezhgmJivLHuohAAAw6VKSLHLxkhJzfdfRZqk4PlB+pTP0av/K/KxySU5898O3jNREk/XtX7bbgjbezNREOX9BockG056ac6dmjipAFm32nWg1GWdKjypXLyyUWaWjPx71/X+jXQtEqogIPvX19ZnA0/ve9z750Ic+5F+v5XYrVqwwTcg1E0r/1WVfGd5o6BelXgBEtj3HmqW7121Svn3ltPqTHRcvkp+ZLLdeOstfYuce1jAUAAAgnHQOa2auWTFa7ne24NOq+QWmz1Nje49k2m1y0eLioI5zZmmGucSy3cea/dd1P3T30ZYxB58WTs+WmiaHCTQOLOeQoY+oE9Dg09GjR01j8K1bt47Y9Hvfvn3jut8//vGPUlFRIb///e/NRWnpnfaG0sbkd999t/zkJz+RjIwMeeihhyb8PABEHk371magurPmuz6tKE0uObfU9GVIZKpiAAAQIYpy7P7eTUqztrXM7Ww0OHXlyrIgjw6DaU+uwcbT0SU3M1muXTPNlElqBpy+/0C0iesfXKQ7QdoEvLm52WQopaWdnnKp2UnhprGxM9RDADAG2v9gw9Zqc2ppwfRsmTt1oNmnppg/+9YJM+VvZ49LinPs8p5LyiUtJXjp5giOvLzApOzz/Y6JuvPBDaEeQlh59K51oR4CIhzf72NzsLJNjtZ0SKItQZbNzhtV8ClQXG6PmcFOT95pI/RwcaCiVWqbu83+3eKZOaYfVqgdqWmXjbvqTO8rPQF68eLimCw/RHTo7HZKZ7dLMlNtkpJkDeh3e0Azn3bs2CG/+tWvZMGCBYG8WwAwtAnn//7toJmOWM8y7TneLDddVC4LZ+SYngbXrZluZgtJsiVwxggAAES02VMyzWWyOXpd8tymCtP4XC0qz5FzZ+VJOJS3vXOg0b/c1tUX1Abwo1VenCFZqYnS2tUn2WlJZkZlRL6qxi5p6dAm8olSkpcqseBEXae8urPGtCWyJsTLpctLpSArcLNXBjRUnJWVNaZ+SwAwWtrI8fevHpOeXrfp2eTt75eG1l45XNXmv42mKU8vSifwBAAAME7as8gXeFI7j2hfzdNbqky2yvqBBuw+1Y1dZn8wHGh2mAlCEXiKCnuOt8iL71TJtkON8sI7VaapfCzYtK/e3w9b+48NDvYGQkAzn26//Xb5zne+Y/oupabGRnQQQPA4elzy8vZqM5NdRUOXmcXOdBHvH2i8GZ8QxxS0AADAr6G122RI52cls48wTmZ/67R1oQ/yDG+2ruWIw/stARPJ+tl6sNEENJvaeyTJdmp723u8ReadbPURS5999wjfBWETfHrjjTdky5YtZsa5nJwcsdmG1iW/+OKLgXw4AFFMU6l/8dwBU0anNfTOk6V2Wtvv+yLMTLPJxecOTEkMAABi25u76+TgyYxo7Ql01cqys84Oh9PNKs2Qo7Ud/gwIzShPSwl9dcvyufnS2tknHd1OUxJ04aLgzuSH2KHb1Ss7aswxh9K+YlpuplUVKiE+NoKcc6Zkyu5jLUOWAymg38bLli0zFwCYKE3x9qV8a/NGpYGnmSUZpgnenLJMuemiGWJjFjsAAGKeHjz6Ak++prn7K1rDoldRpMnPSjGBu8qGLpP9MXtKhn9fLJRSk61y/YXTpbvXbfp7hkOzcUTPSe/B87DlZiSbE9/a7Sg+Pk6WzcmXWLBsTr5kpSVJe1ef5GUmS2l+avgGnz7+8Y8H8u4AxDCPx2vSqbXppdLZVvRM3LqlpZKTkWSmHAYAAFAe7+nlIZ4wKBWLVHrwrZdwo1nwGoQCAikzNdEEWH0BKM30O39hoTn+0L9N5kyToTajOD1o9x3wPNTdu3fLT3/6Uzl48KBYLBaZOXOmfPCDH5RFixYF+qEARDGd3eVEQ5cJQjl63ZKfmSzXXzCDRo4AAGDEhs/a56mhtccsa1nWzNKMUA8LQATQ44sLFxXJtkNNJgC1YFq2zCqd/Jkmo11Ag0+bN2+WO++8U2bPni1r1qwRr9crW7duldtuu00ee+wxSvIAnNYUVANLBVnJkjIsk0mnNL1m1VSpbnJIapJVykvSwyLlGwAASFhmxFy+fIocqmoXl9sjUwvTJSMCshV0rDVN3ZKQECfFuXYaaAMhojNm6wUREnx6+OGH5eabb5Z77713yHpd/u53vyu//OUvA/lwACKU9mF4Y1edHK/rkKREi9gsCXLFiinmrOVgWmusFwAAgLPRHkCRNCNVn8sjz751QtodTrNcmpcq65aWcLINiGB1Ld3S0+ce0rAcAwLapW3v3r1yxx13nLb+9ttvN+V4AKCz1/3htWPy6s4aqWjokpbOXnG6PbLraHOohwYAADBpDle3+wNPqqqxyxy4AohMm/fVy183V8irO2rkTxuPDfl8I8DBp6ysLGltbT1tfUtLi9hs4Z/2CiD4dhxuFpfnVFPQ5vZeU1s9aIIJAACAqNfvPX3nxzvCOgDhT7Od9p1oHZLZuJuT68ELPl1yySXy9a9/XY4cOeJfd/jwYfnGN74h69atC+RDAYigqUv1zJ72d1IaaEqIj/PPGqFBJ53CdG4EpckDAAAEYlap5MRTZTm5GUlSkK2TuyOaAxRHatqlsqHLP7MaooN3hPeTt3iogBYhfvrTn5Z/+Id/kPXr10taWppZ19nZKXPnzpXPf/7zgXwoABFAm4Vv2FrlP4u3fG6+zJ+WLfWtPVKYlSL2JIvpb3DR4uLT+j0BAABEM51sZf3qqXKirlMSEuJNMEr7ViF6e54++1aF9DjdZlmbW+s+MKKDPclq3tNjtR1m2RIfL3OnMmNe0IJPGRkZ8uSTT8prr70mhw4dMtHcOXPmyAUXXCDx8XyRArFCv3T3V7TKwcp2SbTG+8/qbT/UJO+/fLZcvarMBKDSkq1SVjAQqAYAAIjFANS8admhHgYmwf6KNn/gybe/vKg8RzJTE0M6LgTOhYuKZEp+qslwK8m1S8YkvrftDqds2d9gyv2mFaaZE/7hJuDt1zXIdPHFF5sLgNhT39Itr+2oEc116nQ4pcml0x2nidUS708vzs1INhcAAAAgFoxUZkdZVnTRmSo1+2myuT1eef7tSnH0usxyY1uPmU18ZmmGRFXwad68efL6669LTk6OKa97t6lB9+3bN9GHAxDGdc7xcXFmNjvf72hWWqLUNDtMBF6DT3p2BwAAAIg1c8qy5GhNh9kvVpr9n5lqi8rjAUyurh6XP/DkozNnRl3w6f777/f3d9Lr7xZ8AhB9tGHixl214nR7ZWpBqpTm2f1/S02xylRLmpw7K09K8uxSnHvqbwAAAECsyLDbZP3506S6sUts1gRTGRAtx86a1fXW3no5VNUulvg4WTm/QMpLwivwEc2SEy2mX5xmQPmk260SbiYcfLrxxhv912+66aaJ3h2ACLHlQIMcqW6XuuZuyUwbqGc+XtdpGocvmJYt+ypazZmP1ecWyuwpNNsDAACIdg1tPdLc3muy3wuHzdxX0+QwfWnyMpPCqv2CNgLvcXokKzXRZOoHU2qy1WRARRud2fpgZZu57vL0y8bddZKXlSzpKdGV2RWuEq0JcvGSYpMQ0OcaSAhYMD3Kez45nU559NFH5aqrrpKpU6fKl770JXnmmWdk6dKl8tBDD0lWVvR90IBY09LRIz/84x45Ud8l8XEiGmCfkm/3Nw7vcDhlzTlFsmxOXtSczQEAABhOZ6nTDHCdvXfhjJygBy7eTWtnn8l6yElPknjdQQsBPSmpB7++9gsr5xXI3KkDx387jzTLtkON5rruH168uNhk/oTa3uMtpkmzjjktxSpXrigzTeAnyuP1mucZKyVoHQ7XaZlQjh4XwadJVJqXKn+/bpZ57cP1GCyg35AaYPrZz34mXV1d8uqrr8rvf/97+ad/+idxOBzyzW9+M5APBSAEnnz5sHzxx5vkcHWHuNxecbq85guutrnbf5vCnIGzXOH6pQcAADBR2rvn5e3VcqSmXXYebZYX36ny/02DQG/urpM/vHZUXtpWbWa+CqY399TJnzYek2feOiHPba4w+2ihsOtosz/w5Fse6bruO+453iKhpr2XfIEn1dntkh1HTo1zvDbtrZfH/3ZQHn/+oAluxYLC7OTTMnFCOYufbmODS9BiSVwYH4MFNPPpueeek+985zuyYMECueeee2TFihXy//7f/5MLLrhAPvKRjwTyoQBMouO1HfLz5/ZLVUOXeAftVehVPblmSYiT/MxkmVaULuXF1HcDAIDodryuY8iyTriiQSbtvfL2/gY5WDVQgqRlZn1Oj1y5siwo42hq6/GXO/lmudLH1hYIIT/oHbQ4/HA4HA6PNTgxfLK5iQbujtV2yP6KVnO939tvgltafqhtKSbDgYpW2V/RJgnxcaYKoShncvqtluSlygXnFJltT3sPLZ2VZz4LoaA9tV7dof1oPeb5X3JusVgtCSEZC4YK6BbR1tYm5eXl5vrGjRvl7//+7831zMxM6e3tDeRDAZgEuiO152iLbNhWJV3drtN+oJWmdl+2rFSuWjU1BCMEAACYfEm2hNP2h3xldxoAGmz4ciDphC/DhSrzacnMXHllR81A2Y+InDszz/+3c2fnyeZ99f7XKhxmQE5JtJjgRG2zwyzrmGcUp0/oPjV7arD+kzORTUbwSYMu2vTbZ8PWalk6O1c83n4pyrZLTkZwx6ANxkfbZLylo9dsu1omGshyVQ0ovrK9Rlwns570vd1xuFmWz80P2GMgTIJPZWVlsmvXLmlubpaqqiq58MILzfoXXnhBSktLA/lQAIJMz9r86oVD0ut0mz4CehZDT2j1n4xA6XWdteM9a8tl9cKiUA8XAABg0iyZmScNrT0ms0mDKecvLDT7SkrLjXTfySczNXh9b/Iyk83+mI5D6RimhaiXkvZwutY+zQQW9DUYHOyYNzVLcjOSTG9Q/TcjhCVZgzO11i0tkf0nWqW7zy1T8lMnnCmk7SfiDseZAJzSwIo+38nQ2DY02aOmqUvau/rEnmyV+LgmWbesVErCYOZpzQbzlV3qtnvFirKAZUlpKaUv8OTT2TM0IIgoCT794z/+o/zrv/6rxMfHy6pVq2Tu3LnyX//1X+Zy//33B/KhAATZhneqzNkD3aHSi9vTL/ZEq5kNJNEaL6sXFsr7Lp0VM40UAQAAfFKSLHLtmmnS1eOWJGuCJA7KhNJG23oQrMEpPbi+aHFx0MahwY2rVk6VfSdazX6bZp6EsteOznKnlzMFyvQSTjRYp83iA0XbUKxdUmxK37Q1hWaDBaKB+WgMft29/f3S3uWUzLQk/7IG2UIdfNLg4+B+Xxo03Xu81ZQIBoIGsXT7b+s6FfwtPtmPFlEWfLrhhhtMwEmzni666CKz7pxzzpGf/vSnsnr16kA+FIAg8/V20uBSWrJVunvdkpuZZHZqNNuJ2mkAABDLEuLjTXBpOA1EXb58yqSNQx9vyazcSXs8vDudAdo3C/RkZ54tLs811Qt6brgo1y62QSVtIZoEcYiRmoAHsjG4HrdcvrxU3jnQaHqwleanypyygRkXEXoB7wKmwSe9KJfLJVlZWTJjxoxAPwyAAPF6vfK7V4/Kgco28wO1fs10mVeWZc7a/eXN4yZ11WZNkKWz8+TWy2aHergAAABnpX2P3tpTJ3Wt3Wa69zXnFElq8uRkoAChokFIXyBSZxjcerDRnyF3Toj6bGkJ4pYDjXKirtNUT9iTLOLoHZgBUhujl5dMrM/WcJppdmEQsw0RJsGn2tpa+dKXviSf/vSnZfbs2XLLLbfI4cOHJSMjQ37+85/LvHnzAvlwAAJAmxFuO9RkrneLyG9eOCSffd8SOW9evmSk2uRoTbvkZiTTqA8AAESMdw40yNHagRnpNHv7pa1Vcu2a6aEeFjBpzpmRI0XZKSbQo9UL9kkq/xtu74lW2Xuy1M7RK+Zkt5YjaiN07U82WTMBIsqCTw888IB0dnZKdna2PPvss1JTUyP/93//J7/73e/kW9/6ljz66KOBfDgAAVDTNDDDh49mOjW29khaik1mT8k0FwAAgEjS3DG0+bI2ADezsNGrEjEkNzNZQl2Q2dw+9LOos9xNK0ofsWQV0S2gwae33npLHnvsMTOz3UMPPWRmu1u6dKkpvbvpppsC+VAAxqHN0Sff/+1OaWrvlbQUq9z+d7OlKCfFlNz5WBPiJS8rvJpBAgAAjIVmU+j+jk9mWiKBpyjT5/TIwao28Xr7TU/SwWWVtc0O04RdewAtKs8Jy+yaxrYeeXN3nfQ6PaZfk1YdRONEPtnpiXKs9tRyki3BlN7hlCPV7bL9cJNpDL9oRk7U9qkK6LuuPZ60xE7PKrz55ptm5jtfTxmLhQ0MCBX9TG7YWiW/e+WI9Lq8oq0He51uefQv++SeD50nLR195sdbezutP3+ayXoCAACIVMvn5Ju+T3Utvp5PhaEeEgLI5fbIM2+dkI5up1nWJtu6D6ulZS0dvfLCO1UmKKVqmh1y/QXTQ1Z2NhLdNl98p8rMiugbvz3ZIgunh6YvUzDNn5Ytjh63VNR3msDT6gWFZpbBserqccnh6nbTOH1WaaaZ2S4aNLf3ysZdtXJyrid5a2+9mbGvIDtlxIDr1kON5rUozrHL/GlZERVUD+g7Nn/+fHnyySclLy9POjo65OKLLxan0yn/8z//429CPlpdXV1y6623yiOPPGIyqTZv3iz33Xef9Pb2yqpVq+QrX/mKCWjV19fL5z73OWlqajKP+/DDD0tOTvR9aIGJ+MubJ+TVHTXS6/SaLzb9mUvw9psa8JbOPvn7S2eFeogAAAABow2WL6LpcNSqbe72B56UZg9pQ2sNdNS2dPsDT75AT0Nrj0wvCp/gU3ef2x948mnt6JNopNlcK+cXmMt4dfe65Jk3T0iPc6BR+aGqdhNsTLRG/uzbrV19/sCTf11n34jBpw3bqsy27Gud0i/9ERWwHHvI8V184QtfkF//+tfyta99TT7ykY9IYWGh3H///fLiiy+aANFobdu2zQSejh07ZpY1gKX3/d3vfleee+45E4DSIJe69957TUmf9pi67rrrTIAKwMCX9LHaDqlv6TZN/jQoPjwwrjtm2kwcAAAAiBQJI2TO+LJpRirpsofZTIc6xmTb0HHmZAS3NLCnz22ODbQkMdJU1Hf5A09KM3+G962NVNnDSoL1WvYI24IviDpYTWNkvQYBDT4tWrRIXn/9ddm0aZO/5O6DH/ygPP/887J48eJR388TTzwh99xzj+TnD8yutXPnTpP9NH36dPPG6Cx6zzzzjCnz08dav369ud0NN9wgL7/8slkPxLK2rj7508bjJtvpuc0V5myQJT5eUpIs/gCUnin44BVzzDoAAAAgUhTnpEhZQZp/OS8zWWYUp5vrUwvS/BPm6G7v4pm5kp8ZXidbNVB26bJSyc1IMr2qFkzLlnlTg9fnp7PbKU+fPDb429uV8uaeOokkI5XpaZ/aaJCdniQXLioyDdi1RPj8c4pG3F4tCXGnZXqFW1D1bCZ81Kkz2hUVFZmgkF4fXDanEhMTpbu721yKi4tHPWveYFpaV1BwKk1Pg1K6rq2tTex2u1itAy+6luGlpqZKS0vLkNu/m/j4OHMBooE2LXzmzeMmDTk12SbTi9IlIT5OslITzToNPGk/p0uWFstly6eIJSHyU1UBAAAQuTxer+w/0WZK0Urz7FKUYz/r/6PHnmuXFJum3Vphl5eZJAnx8f6/aV+h5XPyzPXx9BeaDJrpdM3qaZPyWHuPtw7JHDpY2SYLp2eHdZ/X7l63CZbpzJWZdptkpSWacjQ1JT9VSvLOvp0orQDRzKnkxARZPjc/rHp/+egxm17ejW7LFy8plle215iSzZz0JFk6O09iKvh06aWXmmwn7bO0bt26ERte+aY13bdv37geQxuWD6f3N9J6FX/yi2c0srPtEdWkCxhJW2evvLW7Th5//qB4PF5xebymsZ9mNU0rzpD8RIv88yVLTCme1g+nhvEPDQAAAMKX2+OVzm6XpCRaJNE28ROZL2+rkarGgcSFfcdbZO25JUOyms5Ej+Hys07vi+NjtXCS1UdnURtuhFVhtT29sbtW6lu7zfWmjl4pyEqWK1eUSVx8nORlJI3qGP5ARau8vb/Bv6zBq+sumB6xswoW5djlvetmmhK8SOx3NeHg02OPPWZmuFO/+MUvJBi0d1RjY6N/Wa/ruuzsbJNh5Xa7TdaT/utwOCQzcyDNcjRaWhxkPiGivbm71qTRtnb0So/TY2as0y9Ut9crbZ194uxzy4ySdElKEEmyW8XV55LWPkpTEb6yskZ3JgsAAEwuLd/Ssi3tuaPZ9RcuKpaphWcPFL1bHyJf4ElpPORITceogk8YvTlTMuVoTYcJ9Ch9fdNSQp8BpJUZzw/annSSAN9739Z1qqG8anc4R2zCfbbG9MPvQzOqtNQxUulxXiQGngISfFqxYsWI1wNJ+0UdP35cjhw5IuXl5fLUU0/J2rVrTbmdPubTTz8tN954o/lXl31leKOhMyEMng0BiBTaLLDD0Sd/fP2YOXMRb1KKPf5IuKYYzyrNkAsWFcm0wjRxu0fOFAQAAIiF0i4tvdEMkLL8VLJixmnrwYFp3pXH22+miC8rSB13JYnur+r/q5UyPjZLeJbJRXpfoWvPn2YCfZpdpCVe4VD9886BodvT67tq5db8ge1JSykdda4hfb3GaniQSftEJQUgWw/jE9BOw9pr6X/+53/k0KFDZoa64cabGWWz2eRb3/qWfPaznzUz3Wlj89tuu838TRuT33333fKTn/zEZGA99NBDE34eQLjbdqhRdh5pNqWnelZAZ8zQVFWny2O+uLXydN6ULPngVXP99e8AAACxGnj66+ZK0x9IaWPfq1ZNjdjsgVDqc3qGLGurB9331GbI46EzL6+Ymy+b99WbrCcNFmiDcAReut0m8+3ZEs7bk54s1wBxQlycnL+w0FQoNbf3muDZyvmj6+k8mG5LrV19ZmY8/bxrY+9w7QEWCwIafPr85z8vu3btkvPPP1+SkiY+VeSGDRv81zWj6Q9/+MNpt9Fm5z//+c8n/FhAuNM0WW2Yp+nO2w81S2qK1fQ308BTn9Mr9mSLZKUlyfI5uXLpsimSkZoY6iEDAACEXHWjwx948pXeHK/tkDllwZtdLFpNK0qX2pZTpUza+HmiB/Nzp2aZ5tFagqdNpclKiwwaJNp3olU6upySn5Us5SUDrXjGYnpRmr+vk5pSkOY/ca7bgZZ1ToQGNy9fPsUcRxF0irLg0zvvvCM/+tGPglZ+B8Sqt/bWycadtebMkqag1jQ7pLA/xZzBmFuWKa1dTslOSzRf+hP9kgYAAED0Bgu0J6gGC2aVjr5Prs/sKZnmgF4zSTRLacH0wGTS6Kxr4TzzGk731p46OVTVbq4frGozs+ktnJ4zpvvQALDFEi91zd2mB9V4tyedVKmq0WG2Te1BNryhOIGnKAw+FRQUiN1Oo1ggUFo6euX3rx2VAyenn1X6xa5nhTQDSoNPelbgtstmS+EYG/ABAADEguJcu+RmJElTe69Z1iCHZvDEmk176k2QQB2ubpeePo8sKh9bsGC008KHM4cGKhq6zD70tKLTAxUYnWM1ncOWO8YcfFLlxRnmMl7aM+ovbx6X3pMlfCXVdrl0WWlY9LRCEINP//Zv/yb33nuvfOYzn5EpU6aYkqDBiovJyABG25vg+S1VJuOpoWUgTVwbMWoaqk4RWpafJjkZSWbmCp31IZMSOwAAgBFp1sMVK8rkRF2nmaRlSkFqTPZ7OlbbcdryeIJPkUxnV3vmzRPS5xoIVByrTZV1S0tiLlChxxNv76uXXpfHBBLPmTH27cBmixd376kJjXTG7VA4WNnmDzyp6iaH6ROVO44G5Yig4JMeHOuMdHfeeedp6/UDvW/fvkA+HBC1th9qknf2N4jTOdBAXDsw+mK5cRJnMp7WLS2N6GlCAQAAJjMANZ6eNNFEZzlz9ZwKFsRiAO5QZZs/8KR09reWjj5zUjdW6MzYz2+pND22VGtno5kBbqxlmOcvKJSXt9eYfkrJNoucNzdfwkZsxRJjM/h0//33y6pVq+S9732vJCcTaQTGorWzV3r7PJKbmSQNrT3mS1ODtjqLnaYHayZhRqpNrlxZJqsXFFK7DAAAMMGDcO0REyt0//HlbdVmhjoNFqyYF0bBggDo7nWbzKb0FJukJFlGHZWIsaQnU6bmCzz56LHHWINPJXmpcsvF5eY4Rfs1hapR/JyyTDlS3e5vUVKWnyo56bETTIzZ4FNLS4vcddddpuQOwOh0dPfJf/9ut1Q3dUlCQoLMLEmXBdNzJCctSXr6usyPZ3Jigiyfky8XLCqS/Cx6OwEAAIxXe1efvLi1eqB/ZorN9IfRrPJI4fX2y5YDDWYWP3uyVVYtKDDPYzS9r25eWy6OHg0W2KIq8Fbb7JANW6v9s5pdcm6Jeb7DzZ2aKUdrTgUqtDm19lKNBC63xzy3iZYI6rGFNSHeBCF99AT3eLPp9BJK9iSrrD9/mmmCr9u0zsAYa2WUE6FVao0n++HlZSQF9bULaPBp5cqVsm3bNoJPwChnHPnb5kr58xvHTMNH/ZxbE/rNrBEzitNl1pRMUzut69ecUyizpzAdMAAAwERt3F1nAk9KM2Xe2F1nMssjxc4jzWbWOt/4X9xSJTdeNGNU/6+W2kVjud3mfQ0m8KT038376uWGC2ecOVDR7BCbJUFK8+xhH6jQTKUX36mStq4+UxGhrTcmUiao7//ac0vMdq8liNMK02T+tDPPMlfX0i1NbT2SmZYopXmpEo6SEy0xX1Y73uNR3bY0cKdKcu2ybllp0JrwBzT4tHz5crnnnnvk5ZdflrKyMrFYht79xz/+8UA+HBDRXtpaJc9trpA+l1dbOpm+TnoGIsETb8rvrl41LdRDBAAAiDp6MD9YZ89AICpSNLYNTEbjowGoPqcn5BkooeQLPPkMzuoZMVAxgdnVJtumvfUm8KQ0Y+u1nTUjBtbGQrPCbllbftbbHapqkzd31w0cq4jIstl5snAczckRnnQSBl/gydesvbK+y2QEhn3w6Ve/+pVkZWXJ9u3bzWUwjSgTfEKsa2rvkZe3VptMp8aOHpPmGB8fZ5qK65e6xpi1sfiimXypAwAABENRToocrTk181txzunlWYHU0tEre461mH29+VOzJjwLl2agaObO4DIqmzV6SujGY1Zphmw71DRoeWz9i8KZlkkOLxvdc7zFXJ9RlG6CacGy93irP/Ck9HEJPkVv0NbXCy9YArqlbtiwIZB3B0TVB1t3cv7vhYPmA621+tqcT9N93Qn9JgilaY+pyTa5/fJZMrMken4wAQAAwq3xdpLNYoJC2ph4yazcoGZZaaa774CusqHLlH1lTKDH1Lmzck3DaNPzKclieoKGe+lYsC0qzzV9rJrbeyU7Pcm0sIgWmqXUejLzyeP1SlunU7bsbzDL+0+0yvrV0wKa9abHJQ1tPeLxaNhpcOhJglaOhdDQMkot5fT1QNPrJXnBC8YHJUza1NQkTufp6avFxcXBeDggrDldHrPToT8OTW29phGenqHShoGpyVbzQ+nyeMyO0NWrpsb8zgMAAEAw6T7YZE0LX9/SPSSTQE9I1jY5JhR80vFftJjjquGmF6WbS7RZOifPvOcNrd3S6/RI3KAZ+zS4WdHQGbBMLw08vbqjRo7XdZplS/ypx9JjFB0LokdyokWuXj1VDlW2m+XZUzKCmkkX0Ht+5ZVX5O6775bW1oEGeIM3Yt1Y9+3bF8iHA8Ka1mZvO9hoZrFr63KaHw2lOyC646Ef7CUzc+Xc2XmSn5Uc1A86AESSOx8kkzochcv78uhd60I9BMRYT5QjNe3m5OG5s/LMicOxGGn/brL3+bQflIrlnlCRTLONfNl5x2o7THBoyN8HBYgmqqG1xx94Um5vv8yfliX5mSlmRshImRkQo6dN+IOZ/TlYQL/57rvvPlm0aJHcdtttkpQ0/g78QCRzu72yeX+d/HVTpfnC1uCrdnPS/gL6pa1NIZUuX726TGxWgk4AAADhpqqxS17ZXu0vPNID8+vWTDeBqLGUTM2bmuWfnW52aWbQmvmeqVn1/oqBx55bliUr5xdM2mNHu16nW3Q3fzKDiWUFqVKQlSL1rd1mOT8z2cxWFyh67DJcotUyqdssxJRXao9gLYMLZHAx1AL6SWloaJAf/vCHMmPGxLrvA5HIpKlur5Zn3qowASbt65Rmt5mMJ4/HY2rztRliQnycrF5YKOXF6RKv3cUBAAAQdrSn0uBDcS1xanf0SW7G2BqGr5hXIIvLc01/z8kMVGjwzBd4Unpd+7lonxecnVYrHKhsFbe7X6YVpUlm6qmsH+255Gv6PbMkQ85fWHjG1hl6P30uj2m7MdGeSQnx8fJ3502R2pZuPfiQwpwUsy5QCrKSTXZTa+dAj6lEa4JMLyLwNJma2nrkxa1VpsRSt5nLl08Zsu1FsoB++61atUr27NlD8AkxGZ1+/PmD8sbuOtOcz2Q7xYl0dbskJyNJ0u2JMntKpsyakimzSzPEaiHtGQAAIJxpM+/BNLgw3uBRKEreunsHmggPpidDMbp9+7+9XSFN7b1mWQNN2ptVAzO1zQ5/4Ekdrm43Ab2RsoOqTfZcjbg8XtPn6/Lzppgyp4nQTJiS3OA0hdaT5letLJODle3mNdAeWtqfFpNn4+46E3jyfYbf2lMvV64sk2gQ0ODTv//7v8stt9wir732mkyZMuW06O/HP/7xQD4cEHIut0f2V7TJkep2OVjZdrLEbmBeCN36vSfPl+nsdTqzia/vEwAAAMLbvGlZUtfSLdVNDpOxoiVrEw0cTCbNitF9T9906taEeCnMTgn1sCJCc0efP/B0aubqdlk2J98/M9hgI60baN5dawJPqt0xMEvdxUtKJJzpSfIF07NDPYyoV9nQZSYk0Kym8pJ0f+xkeIB4pG0rUgU0+PTf//3fZqY7DT4lJw9NR9UXk+AToomeDfjr5kpp7uiVxrYe6ex2iSU+Xlxer9lB0fI6PQNyy9rygM1AAQAAgMmh5UyXLZ9iGnYnJMRF3EnE9BSbXLFiiuw9PlB6p42jyWIZncGzvPnXnXz/NYBnsySI0z2QnaL7/MU5pwf1PN5+/218tI8PcKCiVd7aW+9fbunsNeW5akp+qsmm8ynLj54y2YAGn/785z/LAw88IDfeeGMg7xYISy0dfSbwpFKTrJJkSxBXnEhCQrzp97Rsdq584Iq5YhlDU0oAAACEl0ieJU77U120eGw9qiCSnZ4kc6ZkyoHKNrOs2Slzp2aZ65r9pmVQu481m4bj2lA+Y4SePBqs0obzNU2OIQ3DgUNV7UOXK9v9wadVCwrEnmyVtk7tL5ck86MoCy2gwSfNdlq6dGkg7xIIG5oCWdvcLYnWgR+SwWe/kpMsMrUgzTSF06lIl8/Nk/ws0poBAAAQm7TRdnVTl8RJnJTm2wPaGHsyrFpQKDNLM8Tt6TdBgMH7/tr76cJFxWe9j7VLSmTnkSbp7HFJcY7d9IAFLMOyKC2WU5l2+jlZMjNXolFAg0+33Xab/OAHP5Cvf/3rp5XdAZFKs5hqmh1mql398VEzitPND87gqXO10eAVK8vMrBAAAADASJwujzh63ZKabInaSWg08PTsphP+WdPyMpNNCWCkBaDGOrPhcFZLvOkTBQy2fE6ePL+lypRlagN57ScXCwIafNqyZYu8/fbb8txzz0lOTo5YLEPv/sUXXwzkwwFB19zeKxu2Vsmx2g7TJFB/OPVMx9GaDllUnmvSI3V6Vf2B1VntIq0XAAAAACaPNhjWadR131FbNly2bIrZh4w2x+s6/IEnpf1RqxocI84Ih5F19bhMo/N0u830k0X0yM1MlhsunG6OL1OTreYSCwIafFq2bJm5ANFQYvenjcdkz7EWcbq8Zuo67TuoP5xpyVbTx8k3s53WhAMAACA6Mt41q72j22nKpAIdLHljd50JPCmdTn3Tvnq5etVUiTYnd5OH8I60EiN650CD7D7WYq4XZKXIZctLOckdZZITLeYSSwL6bJnNDtHA5fbIk68ckf0nWk0Qyun2SkqiRXQ3wWqJMzNXzChIkww7s4UAAABEk9d31ZqMd3Wwsk1WLygMaJ+ePtfQ2c40ABWNNGinwZPObqdZ1soBbVGBs2vp6PUHnlR9a7fZFudPi57G04hNAQ+17d69W37605/KwYMHTdndzJkz5YMf/KAsWrQo0A8FBJSejdGU1obWHul0DPxQaoaTZj5pwKkgK9k0G7x4SYlML06XONJfAQAAoirr6fjJwJOPtloIZPBpWlGaHKgYmEFNTS+KzjI07YF6zeqpcqKuU3SXeVphuul/hLEHKM26KA1SIvTHvhEbfNq8ebPceeedMnv2bFmzZo14vV7ZunWraUT+2GOPUZKHsNTh6JOXt9eY/k46W92CadmSlGjx/0CmJFvEnmg1jeB01gsaigMAAEQfbfyrJx59ZXHKZg1swET7haan2Ew/JD2pGc2zn+k+c7Q9Pw1Q6nYSTLpdpKXY/Flj+njh0itL2474ennReiQydfe65KVt1dLU3mu+iy5ZWiKZqYmRF3x6+OGH5eabb5Z77713yHpd/u53vyu//OUvA/lwwIS9vK1a/vzGcZPynJSYILNLM2XnkWaZOyVT+r390trVZz6U118wXQqyU0I9XAAAAATRmoVF8urOGhNk0LYLy2bnBfT+NdOA8qnIo6049IBd+79qc+hLzi0JWvBFZ0C8amWZ6T3r8njN5EbhEOjRTBk9dqps6DLLGhC7eHEx1SAR5s099SbwpLS33Ws7auTaNdMjL/i0d+9e+cY3vnHa+ttvv11uueWWQD4UMCHVjV2m4ePG3bXi8Qw0P+zt80hVo0PKS9JlYXmOLJiRIx6PVzLTEplhAgAAIAboAfUtWeXi6HVJhj0xbErFNNvk7X315oSplu7prMuYPJv31ZvAk28Wule218iNF80I2uNpI+rlc/MlnGgJpS/w5F8u6pKygvDIysLo+DLqfDq6XTJZAhp8ysrKktbW1tPWt7S0iM1Gc2aEhw6HUzZsq5a2zl5xu/tNPyeNLWmAye3xSFqKVexJFkmID4+dDQAAAMTuLFRuj1de2FIp3X3u/9/efcDJWdeJH//u7sxs772mbDrppBJCQui9CMiBjVPU8+wocCp6Fg48z15Q7tRTz/ufCIpKUyAQ6SEhIb237b232ZnZ/+v728xsTdgys/PMzOf9eu0rO5PNzpMnm988z/f3LeZx0+EeiXPYwq6kzcqG36C3dfWaErRIyvoZXI56tudgbXkZCdJyur+x9/FU8evd9YUXXihf//rX5ejRo77njhw5YrKhNm3a5M+XAsZF3xx0ip1qaO02qdTxsXaJtUdLTHSU2dWKiYkyaa2XrCgm8AQAAABL0Ewbb+DJSwfkYOrkD7tBz0uPj6jAkyrKThzS+zbeYZOCrESJFBoE1nvIULdiXo7Mn5Yu2Wnx5t53/eL8KXttv4b0P/3pT8sdd9whV199tSQn96fftba2yvz58+Xuu+/250sBY/bmgRr5+85K0aViTnGqrJibK/pWoQGnWUWpUlHfIdmp8b6G4gAAAIBVaEa+PSba9P/xSkuiqmQqLZ+TbYJNtU2dkpLokBVzrVUSNxUS4uxmgqFOa9S429ySdEtlCAbS6/uq5ZBOqYyKkuVzsmThjEyxuiMVLVJZ32HWDy3T9ZYQ22KizeCDYPDbT0tXV5ekpKTIo48+Ki+99JIcPnxYuru7ZdGiRbJ+/XqJJpMEQXCkvEX+8spJcXv636x3HW2UjOR4WbswT7YfrBOHLUbWLSqQxaXWX0AAAAAQebQBtU6k0n6lpudTXrIsmEHT8qmkE+fOnevf5vOhSKfwWa0XVaAdr2o1ATejr8/cQ2qpWlZqvFjVobJmeW1vte+xNhi/bFWJBJtfIkJPPPGEKavbu3evCTJt2LBBPvShD8mOHTtMxtPzzz/vj5eRP/3pT3LVVVeZj29+85vmuUOHDsktt9wil19+uXzyk5+Uzs5Ov7wWwmex8AaevJMqtGHj7KI0ufWi2fIPF88m8AQAAABLy89MlHdtKJXbL5kj6xblMwwHmCI6fGDEc11Dy2Ct5mRN25DH1Y2d0tPb34ImpINPb7zxhgkwab+n3Nyh6Vtf+MIXTFBKy/HeeuutSWdW3X///fLrX//aBKG2bdsmr776qnz+85+Xz33uc/LMM89IaWmpPPTQQ5P8GyGUHTzVZMagvnmg1vR4Sogb2jBS36eLc5KCeowAAAAAAOvTLKfB/b20fC0rLW5CU+Ze2V0lL+6sMJPXA0n7cQ2mpXZWmNw56SN4+OGH5T3veY/827/9m2RnD01F1GDQAw88INdee+2kg0Jut1s8Ho8p5dPP9cNms0lbW5usWrXKfM3NN98sTz311KReB6Fr/8kmeX1fjZyqaZN9Jxpl81sVpj5bG6qlJjokKc4uG5cVypJZZDoBAAAAAM5Oy+suWl4ohVmJJonh0pXFkhhnH9f3cPa65Zmtp0wfppPVbfL8WxVS0xS4ii3tS6X3v8oWHS3nWyRbctI9n/bt2yf33HPPWb/mtttuk49+9KOTep2kpCT51Kc+JVdccYXEx8fLypUrxW63D8m2ysnJkZqamnHX7+oHQl9FffuQ/1Q6BSQu1iY3b5olHV29EuuIMT2eAACj+8cHNwf7EAAAACylMDvJfExUY1uPdHa7hkxiL6/tkNz0oVMU/dkc/trzZ5h74DhHjOkbZwWTDj719PRIXNzZ087S0tJM2dxkHDhwQB577DF54YUXzCQ9LbV75ZVXRnzdeEdeZmQkRtyYzHCx52i9vHWw1ny+ZmG+ZGckSkOb0/f7Ogo0OyvJ/PuS6wQAAAAAmGraBiZKg06DntP2MJPRd7r5ufY4dthjZO05uZIzKJilSRnaIN5KJh18mjFjhmksXlJy5u7p2u+psLBwUq/z8ssvy9q1ayUzsz+McOONN8rPf/5zqaur832Nfp6Xlzeu79vY2EHmU4jRpuE65nTz9grfc0++dFQuWVksZVUxpqG41rSuWZwvzc00oAdCTXp6YrAPAQCAiNDZ3Ssx0dGmQgBAYGgJ3LnzcmT7gVoTgCrKTpK5xWmT+p46gW/viUbzeWePS57fXiE3bphpEjCsatLBJ+3n9P3vf1/WrFkzouG40jI4/f13vetdk3qdefPmmQl3H//4xyUhIUE2b94sK1askOeee062bt1q+j5pZpRO2hsPj6fPfMD6NLr78q4qOVbVagJMLrdHstNOj7jsE2nv6pWr1k4zwSmN/mpjNZdrYNIdAAAAgP57IG18XFbbbjIyls7OZgI0EEDnTM+QOUWp4nL3DRmINVGNbd1DHjtd7v5WM+EcfNJm43/961/l6quvNgGmZcuWSUpKijQ3N5uMpz/+8Y8yffp0+eAHPzip1zn//PNNfynNeHI4HLJw4UL58Ic/bHpA3XfffabxeFFRkXz729+e7F8JFnWius0EnpTuztTVdklSvL0/jTEqSjKS48yvWuMKAAAAYHSHy5tN4EnpNvyOw3WmmXJ6cmywDw0IW3ab9l/yz/fKTImTw9Lie6xBJ703trKoPk0nmSSn0ynf+973TOZRS8vACcjKyjIBqX/6p396x75QwVJX1xbsQ8BZlNe2y/5TTRITHWWCTIfKmn2/19LeY+pY05JiZensLJmRnxLUYwXgH9nZyX75Pqzv40fDcYSCX9y7KdiHgAlifbcODTbtOtow5LlLVhRLQRal7whfLR1O2XGoTpwuj8wqTJWZBaF7/9jX1ydvHaqXE9X9PZ9WL8iVHG9VkEXXdr/E3TQT6e6775bPfvazUlZWZgJQGRkZUlxcTDNvTIgGlvQ/087DdZKa5JCYmGjzH0xDpd4eXdpQ7dp1MybdrA0AAACIJNpzZvexRnN9reK1qqClSyrrO0wAiiAUwo2z1y1/23rK9EdSVQ0d4rBHm/8LoSgqKkrOnZttPkKFX+/abTabaUAOTJT2capq6JQXd1RIfUuX1Ld2S0unU6blJpugk2Y4tXY4Tff+hTMzCDwBAAAA46R9Uy9ZUSSHy1tMhUFbp1N2Hq43v7fvRKNsWFoo0/L8k6kGWEFzh9MXePLSYGuoBp+mggan2zp7xRbjn9Y23LnDMrSJ+HPbyqS8rl0aW3skJan/B7zX5TFNxLWGtSQnSTJSrFnCCQAAIqc8lPI/hLr8zETz0etyy/8+d9j3vOZCHaloIfiEsJJwuk/w4K5DVu+RFExuj8dMl69s6DCPF8/MlGVzJpdlFe2nYwMm7Y19NSYarVlNnr4+ae/sNSV3KiYmyoynJPAEAAAA+I9WF+j192B2G7eJCC8aaDrvnDyT6aem5yXLvGnpwT4syzpU1uILPKldxxqksXXohL3xIvMJltHT6za/piQ6THqfPs5NT5C1C/Jk3aJ8X68nAAAAAP4REx0tq+bnyBv7a01WiGaILJ2VFezDAvxuVlGqzCxMEY+nT2wxBFjPptvpGuW5/vv1iSL4BMvQ6PPOIz0myFSUkyS5GQmyej7ZTgAAAEAgzS1JN03GtQohPSnWTM8CwpFm+UXHkNTwTkpykmXP8UYTqPNmjmWnTe6+nOATLGPJrCyJ00kbzd2SluSQBTMyRqQAAwAAAPC/5ASH+QCAzNQ4uWxlsRlKoGW4C2dkiN02uaA0wSdYbtdlbkmwjwIAAAAAgMiVk55gPvyF4BMCShuHn6hqM1M0inOS/DKiEQAAAACAYOvo7pXy2nZTqqoTIqncOTOCTwiY2qZO+b/NR6S5rcc0Ec/PTJAr10wjnRcAAAAALJIs8NbBOqms75DEeLusXpBr+vvgnbV2OOWp10/6BmcVVyXJhcsKJYoA1Kho8Y6ALGAdXU554tWTUtPYaf4z1jV3mV5OWjMKAAAAAPAPl9sz4T+760iD7D3RKE3tPVJe1y7Pby/367GFs0Nlzb7AkyqrbZemtp6gHpOVkfkEvy56f3+7UvYebxQN9ja3OUW0OX7UwLhGYsAAAACwkq4el5ysbjMTl2fkp5jmukAo0EDH5rfKpb2r10wp3HRu0bizlrRaZbDm9h7TMmWyzaUjFVlPZ8bKCr/QEYxPvHrCRMqrGzuluqHT1L/GDBpjmZLgkLklaUE9TgAAAGBw4OmJ107IG/tr5LW91fLMGycnlUUCTKWXd1WawJPSzCX9GR6v1KTYIY8TYm0EnsZo3rR0iXcM5PNo8Do9eej5xAAyn+AXWlZX1dDhe6wJTxp1T06wm+ZrhdmJctHyIhqOAwAAwDKOV7VKZ7fL97ixrUeqGjrNoBzA6tpOB5682juHPh6L5XOyTdKA9nzS+7f1iwv8eIThTc/XNeumS2VDh8Ta+u95cWYEnzAhvS6PHK1oEbenT6bnJ0tUdJTE2mMkJjrKPKcS4mxy08ZZUpDFf0IAAABYz2iTqaiaQagoyEyUkzVtA48ncN+lZaablhf5+cgiR3ysTUoLUgP2/Tu6e830eL3PLi1MDemyYIJPGDdNRf7r1lPS0NptHu853iBXrZ0mMwpSxeXpk/qWbnHYouWK1dMIPAEAAMCySgtT5GBZs+lzo/IzE7l+RchYtyhf4mJjTK/d7LQ4WTo7K9iHBD8Hnp549YR0O/ubmh+uaJErVpeILSY0A1AEnzBuNU1dvsCT0v8Mx6vaZOPSApldlCpud5/kZSaYTCgAAADAqrS3zZVrppmSI204rmUzo2VDAVakWTBrFuQF+zAQIMcrW32BJ9XY2m2myRdmh2ZZMMEnjJum/A2nb9ba2b8oRP8jAAAAIHJv4KflJQf7MABgCG1tM9p9d6gKzXwtBFVueryUDGrCmJYUK7MLA1fnCgAAAABAJJldmGrutb10EEJeRoKEKjKfMG6a4bRxWaGZBKLNxfU/QCg3PgMAAAAAwEoc9v6y4OrGTlN9lJ+ZYO7FQxXBJ0yI/tDTjBEAAAAAgMCw26JNxlM4IF0FAAAAAAAAAUPwCdLZ3WvGOAIAAACYmK4el7R39UpfX1+wDwUALIeyuwj32t5qOVTWbD6fVZgq5y3MC+k6UgAAAGCq7TraIDsP14mGnbQ1xablhRITzT4/AHixIkaw8rp2X+BJHalokbLa9qAeEwAAABBKmtt7ZMfpwJOqrO+QA6cGrrEBAGQ+RbRup3vUdGEAAACc3T8+uFms4Bf3bgr2IUS80a6pu7mmBoAhyHyKYDqq0WGL8T22x0QzwQ4AAAAYh8yUWEmKt/seawuL4tzkoB4TAFgNmU8RLDHOLpevLpF9JxrN4/nT0iU5wRHswwIAAABCht0WY66pdx9tkF63x/RRzUmLD/ZhAYClEHwKYzpp452ah6cnx8q6RflTdkwAAABAOG7qrjknL9iHAUTMfSxCD8GnMNTjdMuWnRVS3dRlUoA3Li2QjJS4YB8WAAAAAACj0mFY2w7UiqevT+ZPy5Bz52YH+5DgR/R8CkNvHqiVqsZOEzFu63TKCzsqgn1IAAAAAACMqrG1W17fW21KV92ePtlzvEFOVrcF+7AQqcGnzZs3y4033iiXX365fOMb3zDPbd26Va677jq57LLL5Ctf+Yq4XJE3WUIjww0t3dLU1mMet3Y6h/x+e1ev+RoAAAAAiDSd3b1S29wlzt6RkwlhDW2dvTL8jnX4fS1CW8iU3ZWVlZng0u9//3vJzMyU97///fLcc8/J/fffL7/4xS9k+vTpcu+998qjjz4qt956q0QKt8cjz28rN5lOShsc5qbHS11zl+9rtOFhNDWzAAAAACLMiepWeWlXlXg8fRLvsMklK4tN31tYS2ZqnNhiosXl9pjHeveak07j/nASMplPzz77rFx55ZWSl5cndrtdvvvd70pKSooUFRXJjBkzTEOym266SZ566imJJEcrWn2BJ3WkokUKMhPlnOkZkpkSJzPyU2TjssKgHiMAAAAABMNre2pM4El1OV2y/WBtsA8Jo9BexRefWyS56QmSnRYv65cUmM8RPkIm8+nkyZPicDjkgx/8oNTW1sqmTZtk9uzZkpub6/uanJwcqampGdf3jY6OMh+hSuthh2c19UWJrFnItA0AAAAAkUtbj2gPocF6XUMfwzpyMxLk8tUlwT4MRHrwye12y0svvSS//e1vJTExUT72sY9JfPzINLzxjmTMyEgM6TGOi+fa5EB5i69+OSXRIXNnZkmcI2T+aQEAAADA73STfmZ+ihytbPE9V1qYGtRjAiJVyEQosrKyZO3atabfk7rooovkr3/965DAUV1dnSnLG4/Gxo6QznxSlywvNGMpY2KizEjKro4e8wEAoSg9PTHYhwAAAMLEeYvyTD+h1g6n5GUkyLS85GAfEhCRQib4dOGFF8rdd98tLS0tkpSUJC+//LLpAfXQQw/J0aNHpbS0VB577DHZuHHjuL6v1v96a4BDuT52+Zxs32MXqaQAAAAAYLKf5k9LD/ZhABEvZIJPS5YskTvvvFNuv/126e3tNVlQN998s8ycOVPuuusu6e7ulsWLF8ttt90W7EMFAAAAAADAaVF9fX2hnfYzSXV1bcE+BADAINnZ/kmHZ30fv398cHOwDwHAOP3i3k0SKljfASD8jHVtjw74kQAAAAAAACBihUzZHQAA4YhsIwAAAIQ7Mp8AAAAAAAAQMASfAAAAAAAAEDAEnwAAAAAAABAwBJ8AAAAAAAAQMASfAAAAAAAAEDAEnwAAAAAAABAwBJ8AAAAAAAAQMLbAfWsAAAAAgfSPD24WK/jFvZuCfQgAAAsj8wkAAAAAAAABQ/AJAAAAAAAAAUPwCQAAAAAAAAFD8AkAAAAAAAABQ/AJAAAAAAAAAUPwCQAAAAAAAAFjC9y3Dn29Lrc0tfVIQpxdkuLtwT4cAEAYjicHAABn1tLeI06XR9KTY8UWQ+4EEKoIPp1Ba4dT/rr1lHT2uCQqKkrWnpMrs4vSgn1YAAAAABARth+skz3HG8znGny6bFWJxNpjgn1YACaA0PEZ7DhcZwJPqq+vT17fVyMeT1+wDwsAAAAAIiIZwBt4UlqRsv9EU1CPCcDEEXw6g16XZ8hjDTy5PUOfAwAAAAD4n9PlHtNzAEIDwaczKC1MlahBj6flJovdRoonAAAAAASaltllpsT5HkdHR8nM/JSgHhOAiaPn0xnMyE8Rhy1aKhs6JTHOJvNK0oN9SAAAAAAQEWKio+WyVcWy/2STOHs9Mj0/WbJS44N9WAAmiODTWRRmJ5kPAAAAAMDU0sqTxaVZwT4MAH5A2R0AAAAAAAACJqpPR7kBAAAAAAAAAUDmEwAAAAAAAAKG4BMAAAAAAAAChuATAAAAAAAAAobgEwAAAAAAAAKG4BMAAAAAAAAChuATAAAAAAAAAobgEwAAAAAAAAKG4BMAAAAAAAAChuATAAAAAAAAAobgEwAAAAAAAAKG4BMAAAAAAAAChuATAAAAAAAAAobgEwAAAAAAAAKG4BMAAAAAAAAChuATAAAAAAAAAobgEwAAAAAAAAKG4BMAAAAAAAAiL/jU3t4u11xzjZSXl5vHW7duleuuu04uu+wy+cpXviIul8s8X1NTI+9973vliiuukPe9733S0NAQ5CMHAAAAAACAV1RfX1+fWMyOHTvky1/+shw/flyeeeYZycnJMUGnX/ziFzJ9+nS59957ZdmyZXLrrbfKxz72MbnkkkvkhhtukEcffVReffVV+c53vjPm16qra5NQFx0dJRkZidLY2CEej+X+OacU52IA52IA5yK0zkd2drJfvg/re3jhXAzgXAzgXITWuZiK9T0UzsNU45yMxDkZiXMyFOdj7OdkrGu7JTOfHnnkEZPdpEEntWvXLikqKpIZM2ZIVFSU3HTTTfLUU09Jb2+vvPHGG3L11Vebr7v++uvlxRdfNM9H2g+Bnhf9NdJxLgZwLgZwLobifIQO/q0GcC4GcC4GcC4GcC76cR5G4pyMxDkZiXMyFOfD/+fEJhb0wAMPDHmspXW5ubm+xxqU0ueam5slMTFR7Ha7ed5ms0lSUpI0NjYO+fqz0RMX6j9QMTHRQ34NdT1Otzjs0eYHO9LPxWRwLgZwLobifCAYPH190uvySKw9JtiHAgAAgClmyeDTcB6PZ8RzGpgY7XkVHT32GypNG5tIkMOKUlLiJZS1tPfIEy8fl6a2bkmKt8uV62ZITnpCRJ4Lf+JcDOBcDMX5wFSpqGuXLW9XmuBTdlq8XHRuEUEoAACACBISwae8vDypq6vzPdbP9bmMjAzTmFybj2vWk/7a0dEhaWlpY/7eWq8YDplPehPZ2tolbvfoAblQ8Letp6SmvsN83tjjkr9sOSI3biiNyHPhD+F8Llxuj8ScTvuM9HMxEVY/H+npicE+BPiR9gTYsrNSek//rNU1d8mOw3WyZkFesA8NFl3fbWRlAgAQdkIi+LRkyRI5ceKEHD16VEpLS+Wxxx6TjRs3mnK7VatWyV/+8hfTcFx/1cfeMryxXhSHSwMxvYl0uax3IzlWbZ29pixj8OOJ/n1C/Vz4UzidC82aeHFnhVTWd5isifVLCqQwKzEiz4U/cD4wFXp63b7Ak1dHV//EWsCrub1HNr9VIW2dTklLijXZcZoFDQAAwkNIbC05HA751re+JXfddZdcfvnl4na75bbbbjO/p43J//SnP8lVV10lv//97+W+++4L9uFigoYHEcYTVAh1Fhw6aUlvH6k3gSfvDe2WHRVmlxyAdcXH2iQzJW7Ic6zvGO7lXVUm8OQNRL26pzrYhwQAACIl82nz5s2+zzWj6fHHHx/xNfn5+fLf//3fU3xkCITlc7PFZos2JRm667l0VpaEu64elylHqW3qlOQEh2xcVijpybHBPizLau8aOslSsyk0CEWJBmBtF68okrcO1UlHt0uKspNk3rR0CXf1zV2mz1VHV6/kZybKhqUF4qDP1ZjX9/au/kAUAAAID5YOPiGyREdFRUTAabA3D9RKTVOn+by10ykv7qiQGy6YGezDsqz8zAQ5WdPme5ya6DBZFQCsLc5hk/MW5kskZTtt3lFhNhhUZUOHbD9UJ2vPoc/V2db3E9UD67sG7AAAQPjgrg0Rz+3xmPT+U9XtEhcbI+cvypfcjIlN2Ruvlo6hO7ttXb3mpiVcJjD629ySdHF5+qSspl0S4myyYm62CVoCwGg6untNdmlja7cp/dPso4S4wPcR0nJgb+DJq23Yeo+hNDipQcqmth7JSo2TZXMiazMKAIBwR/AJEW/PsUY5VtlqPm/v8piGpzdtLBW7LfClXHkZCeamyCs3PZ7A0zs4Z3qG+QCAsfQR0lJuVdvcJa/srpZLVhYH/HXtthjJSI6VxrYe33NTtakRqvQ9d/WC3GAfBsLENXf9KdiHAABh4xf3bvLL9yH4hIg3PPvI6XJLl9Mldpsj4K+9/PTOrvZ8SklwyMr5OQF/TQTf4fJm2Xm4XrQN8ZLSTJPRBcD/WtqHru/NHQPBoEDTaW1bD9Sank8FmYmyqDRzyl4bwaETe7fuqzHlg3GOGDlvYZ7kpBN0BABAEXxCxMtOi5fjVf2ZTyoxzi6JcVPzXyMmOlpWziPgFEm0CfFre6pN4Em9vq9G0pJjJZcbFMDvstPj5dSgPnE5afFT9tpa3rdxaeGUvR6Cb9+JRjlY1mw+12EYz2+fukxqAACsjuATwr6f09Z9tVJe1y6J8XbT7HX4NLl5JWnS43SbG5S4WJusnp9jgkJAIDS3O32BJ99zbT0En4Bx6uzulZd3V0tLe4/ZRNAsk+HT5PQ5W3SUKX/LSImVNZR1IYC0X9XwTGr9OU1NYootAAAEnxDWdh1pkEPl/buQnT0ueX57ubxrw8whfZX086Wzs8wHEGh6A6w/c9pYXulPojZCBjA+W96ulNqm/n5OOgUzJjpK1i8pGPI1sfaYEc8BgZKVGu/rIaniHTaz8QUAAAg+Icw1DGrm7Z181O10S3wsP/qBdPBUk+w/2WRuBpfPyZbC7KRgH5JlZKTEyQVL8uXtIw0mALW4NEuyprAUCAgXjS09Z13vEZhs4jcP1EplfackxdlkzcI8068QA5nUOuVQez7FO2JMA3VbDJnUAAAo7sAR1jSjpKK+Y0g/J20CisCprO8wfYy8XthRIdedP0OSuUHxmZ6XYj4ATC6LUCfYeZFBGHgaND94qj+buK3TKc9vK5cbLpgZ7MOyDM1q1Q0X/QAAAEMRfELIOlHdKnuONYpW0C2ZlSVFo2TXLJ6VaTKdtOdTQpxNzluYP6TkDv5X3zJwM6jcnj7TB4PgE4Cx0IxAnQZZVtsu8XHahy9XUhJHrh8XLC2QVwb1fFpFP6eAq28Zml3W2ukUZ697RK8tAACA4Qg+ISTVNXfJ33dW+ho3a3bNtedNl8xh5UvaOHztwrygHGOkShvWWFWDfTRbBTBWe080yq5jDebzpvYeeW57mVy/fmR2jWayXrqyOAhHGLl0YEdVw9BsYgJPAABgLAg+IWSDT4Mnhnk8fVLf2j0i+IQBe443yMnqNolz2GTlvJxRMwn8oSQ3WZbNzpb9JxslOjpKVszNkdQAvRaA8FN3uom4V1tnr+mjQ5BjdL0uj2w7WCuNrd2mp5yuuXZbYPoMLZudZf4tKuo6JCneJucvppk7AAAYG4JPCEnDM2m0kI4Ax5kdKmuW7QfrfI+b23vk+vUzTGZYICwuzTQfADCh9b22fcjEOnr1ndlre6vleFWrryxOg1EXBGjCnzbPDtT3BgAA4Y3gEyZEd1h3HK43/XzmFqfJtLzkKX39wqxE09Bz97EGiY6KMruxOuIYo6tu7BzyuL2rVzq6XAHLfgIQunRU/JHyZrHZos06O7yUNtA0cK3NrLXnk/bqO39RfsAC5eGgZtj6PvwxAACAFRB8wrhpyv1ft5aJ0+U2j6sbOuSy1SWSm54wpcexaGam+cA7Gz4K2x4TLfGxZBIAGKqirl1e2lXpe1zf3C3XrZ9hso+mimbXbFhaOGWvF+p0mENnj2vIYwAAAKthKxHj1tDS7Qs8Ke29VNXATqu399SOQ3Xy7LYy2X6wVlxuj1jBotIMmZabbJp/a/nKhqUFYrcRfAIw1PC1vMvpkua2nqAdj9U2Xl7ZXSXPbSszpcxWsW5RnmQkx5ryc/1VHwMAAFgNmU8Yt8R4u7nIHdzwOzneHsQjso6t+2vk4Ombksr6Dunqccv5i/ODfVimZGXjskIzwlwDUAAwmqRha7muF1r6Fuk8fX3y7JtlZvqeqqjvMCXfs4pSg31oJtPpmnUzWN8BAIClkfmECY1aXjU/10wy08vcOcVpMrMgJdiHZQl6QzL08UDTXCvQGxNtTPvCjgrTpFZ38gHAa05JmszMTzFre0x0lJx3Th5lXCLS2e3yBZ6sur7rhtCuo/XywlvlsvNIvcnEBQAAsAq2MzEh86alm5sU3WmlEeyAxDi7aeY9+LGVnKxuk7+/PdDPpbapS645b7oJJAKAZvOsX1Ig5y3KM5+TSdNPe17ZoqPF5fFYdn1/c3+tHDjVZD4/VdsuXd0uWbuQEjwAAGANRA0wYXpjEumBp87uXnl9X7Vp0Ktldnqh7y1b0VKVdYuCX3I32KnatiGPm9t7pG1QsAwAlK7tkR54qm3ukpd3Vclre6ql2+mS9UvyTTN0pQM2lsyy1sALnQ54tscAAADBROYTMEG9Lo88s7XMjARXx6va5LKVxXLjBTPF2esRh916N28JsfYRAcS4KZxiBQChoLG1W/669ZSvdK2srl2uWzdD/uHi2Wbtn8rpf2OlGx4d3QObCfH06gIAABYS2WkrCDvHKltl87ZTsvNwfcAnzWn/D2/gSWkJopY6aMAp1hFjucCTWlyaITnp8eZzLbU7b2GeOVYAsDINAu062mDW9yPlLQF/vfK6jiE9k7Q/nmZCacDeioEntfacPF9z+IRYm6yj5A4AAFgI22IIG3pDok20HbE2cfa4pKaxUy46tyhgr6cZQ8On/sVbPJBjt8XI5atKzBQ+uy3afACA1b2yp0pOVLX51veO7mw5Z3pGwF4vbpS13Orruw4DedcFpdLldEm8w0YvPwAAYCnceSJsHKtqHfK4vK7dlEcESkqiQ5bPyTYBKJWXkWAasVudd3Q6gSeMhWYQasbJG/tqTF8zYKppVumJ6qH96k4MW+/9bVZRqhTnJPkeL5yRKVlp/VmjVqYBJ22ETuAJY6EDUrYdqDUfrR0DmdwAAAQCmU8IG8N3qnUyUUxMYC/AF87MlNLCVHODro3GrVhqB0zmpn/z9nKpauw0jw+eapILlxcNuSkHAs2UMttipKfX7XvOEeDSNy2vu3BZoXR0uyQmOkriY7lcQnjRUtKnXj9pflVHKlrkmnXTLTfFEQAQPkh9QNjQLKSkBPtAP6PTo8IDTW9KkhMcBJ7GQbPSdKdVgxka4IA16U2JN/Ck9F/qWGXg++0Aw+l6bjudramZmyvn5QT8NXVN100FAk9jp+u5ruu6vus6D+uqbOjwBZ6UBnfLmZAIAAggrqgQNvQm4YYLZkq03SY9XU6xnx6JDWs5WtEiL++u8j2ub+mWdYvyg3pMGJ2Oldcb8MEBwkBnnACjKcpOklsunCX2OLu4nS5fuTOs5dU91SaDRu090SjnL8o32cGwHodt5FrO+g4ACCTuzhFQ2nNp34lG2XmkXlrae6bkZjkzNZ6dags7PGxS1dHKVvGQ/WRJeiOiGSbeG33N8FtSmhnko4JV6LTPXUfrZc/xhiElcYGikzl1fadfnTXpOq7r+dnWe1hHUXaiTM9LHvQ4SaYNegwAgL9xh46A0THVz24rk7rmLvN47/FGuXLNNDORB5Fr+I2j9lMhi8G65k9LNz2euntckpYcawK8gDYqfvK1k76g09GKVrO+ExiKXFGn13OXe2AzwRbgvouYOM1q3bC0UBa39ZjsVr02o30AACCQuEpEwDS0dvsCT0qbctMvBstmZ/maw+uF7poFuWFzwasX8FpWuPtYgzS2dks4lbTqpC8CT/A6XtU6JNupub1HapsG+oMh8gxfz3Wd116M4UJ/3vefaDTlhIN7JYU6DTplpMSFzfswAMC6yHxCwOgO6IjnuHmNeHqRe/35M6WpvUcS4/qbtYeLl3dVybHTI+C11PSSFcWSl5EQ7MMC/I71HaPR/k65GQkmMy49KdaUSoaDXpdbnn79pLR0OM1jbSdw9drplPgDADAOXCkioEGGOUVpvsepiQ6ZX5IelGM5VdNmLhbDKRsllOkNiQZlwinwpDvh3sCTt+xUpz4B4Wh2UZpZ4720d0xuevyUH4dm1B4ub5YDJ5ukszt8slFCPVNS1/dwCTypivoOX+BJ6c+aXlcAAICxC6ktmz/96U/y8MMPm88vuOACueeee+TQoUPypS99SVpbW2XOnDny4IMPSkICmQZWsXZhnswqSjWNx7PTgtMoVkc+a5q8io6Kkk3LC6UwO2nKjwPhLfp076rBrdP15w0IR7qWX7G6xJRWaxaUru9TXbbj9njk2TfLpPZ0efeuYw1y9dppkhBnn9LjQPgbbS3XNR8AAIRh5lNXV5fcf//98utf/9oEobZt2yavvvqqfP7zn5fPfe5z8swzz0hpaak89NBDwT5UDKM3JQVZiUEJPGn2yb6TTUOm8ew7QTZKqKpq6DDZRE1tgZ+cOF6x9hhZXJo15PEiJsMhjGkPsPzMRMlJTwhKv5japi5f4MmbfXikYui0NYQGfa/WPmKaxWbFfko6CS5/UAl1VmqczMhPCeoxAQAQakIm88ntdovH45Hu7m5JSkoyj202m7S1tcmqVavM19x8883y3ve+V+66665gHy6sIkpGZKOQjBKa3j5Sb/ooeXecL1peZIKaVrJ0dpYZX93Z4zJBV/qBAIEzWsCLZJTQo5tCz20vN5sLKiHOJletsVYGm77nXLyyWKrqO8z1hJYVMoABAIDxCZk7Iw04fepTn5IrrrhC4uPjZeXKlWK32yU3N9f3NTk5OVJTUzPuC4pQT532Nnml2evo5+Lcedmy/UCd+dxmi5Zlc7LNr+EuXH4u2jqdUt3QKVv310ic4/SS1Sey72SjlOQlW+5c5FksIBbOPxuIbNpjqjAr0fTjUdpDTntRIXTUt3TJ8cpWOVHV6usRpf2UDpe3yJJZA5mkVim9o2QfAIAICD4dOHBAHnvsMXnhhRckOTnZlNq98sorI75uvKn/GRmJYTNeNiVl6pu9hsK52LAiUebOyJLWDqeZwpOaFCuRJJR/LnR0+zNvlkuP0yXl9R2SmRpv/g1VfHyspKcnRsy5CATOB0KZvndvOrdIKuo6TP+ngsxEcdjDp8l1uDtU1iyv7602maLlte2Sn5VompWrMLksAwAAoRh8evnll2Xt2rWSmdnfQ+XGG2+Un//851JX15/RovTzvLy8cX3fxsaOsMh80pvI1tYucbs9EsnOdC5io0Wykx3i6XVJU5P1+kkEQjj8XLz8VoW0t/f3d0pPjJXahg5JSbCLPSZaZhUkS1NTf8aDP86Fs9ctlQ0dYrfFSEFmcHrYTBWr/2yMN6iIyKXZKMU5ZKOEoh2H60wJW5wjRuLjbNLQ0m2CT1p2N6swze8ZtA2t3ZKS4BgypREAAEydkAk+zZs3T775zW/Kxz/+cTPNbvPmzbJixQp57rnnZOvWrabvk2ZGbdiwYdxNLvUjHOhNpMtlvRvJYOBchMe50GPXfiAqPSXWlGWsmpdjej2lJcWO++91pnOhDW6fev2ktHf1msfaSPaCJQUS7kL5ZwNAaDu9tJtAv5ZP6tTEtQvzTd88X4m1H9Q0dspz28rF5fGYHpCrF+TK3JJ0v31/AAAQZsGn888/X/bt22cynhwOhyxcuFA+/OEPmx5Q9913n2k8XlRUJN/+9reDfajwI5fbIzVN/aO8tb9HOGejYKQF09OlvK5d3KcDxMvmZMmC6Rl+f52DZc2+wJPSqUv62lmplKUBgaJBX8120UwXslEij04D3Xag1pfBdv7igoBMkNtxuN4EnpS+k2w/WEfwCQCAUA8+ve9975Mf/ehHkpIy9OKhoaFBPvjBD8rjjz8+qe+vwSb9GGzOnDnyu9/9blLfF9bU6/LIX7eeMqnyqiQ3WTYuLSAAFUF0hPvV5003Dcf1BlV/BgKhb5TsxzBJiAQsqamtx6zvPb1uk41y7rwcOScAgWVYl/57ZyTHSnO7U7JT4yQrLTDB/j5vitVpmk2rz3EtAQBAiAWftmzZIrt37zafv/nmm/LTn/7UlMUNdvLkSamoqJjsSyHCHKlo8QWe1KmaNqlu7JT8TPrBnIleUNe1dJtftXlrONDyOv0IpNLCVJP9pDfCKjc9QbJSycQAAuXtI/W+/2/ebJR5JWkSE80ExrNtyOh7osMWLTmnBy+EOn0/D/R7+vzp6VK3s8v8nCnNniXwBABACAafCgsL5Wtf+5q52dWPp556SqIHXTzqG7wGo+6+++7JvhQijE4vGvGcm3SUM9H/fy/urDRBOqV9kd596bxgH1ZISEl0mAyrkzVtppl5aWGKKQMBEBjeUtrB65cu+THEns5Yovj0G6dM42y1ZFaWXLJ2RrAPKyRMz0uRhDV2Mz1VG44HKoMWAAAEOPg0a9Ysef75583nF154ofzhD3+Q9HRq6TF52vth7/FG6Xb2745r9ktumOz2BkJFfYcv8KS0VO3QqWbJS4v1282P3vgkJzgkPjZk2sWNmU5ZouwHmBpzi9PMmuUtiZpVmCp2G5GnM9H3Qm/gSe0+2iCrFvlvKIJ+7y6nW9KTYsPy3yEnLd58AACA4PHrHWRGRobU1NQQfIJfJMbZ5aq10+VYZavExETJbG5O3rEkY8Rz7v7AnT+mBT2/vVx63R6xxUTLpuWFlD8CmLCinCS5fHWJVNV3SGK8XUoL/N9oOtyGb4z2nD/eEfeeaJTtB2pNWZpuLly+qlgS4ux++M4AAAAD/HonX15ePqLfEzDZbJTFpZkmI8Vhj5FIpVlHja3do96AeGmZnZ4vr7jYGJlZkOqX139jf40JPCk9hjf21fjl+wKIXJqJouVjmvUUyT14Wjuc0tzeM6Ix9vC+dNHRA+dIs4DTkyffl67H6fYFnrwZULuONkz6+wIAAAQ08+lDH/qQfPGLXzST7UpKSiQubuiFUUGB/1LEgUhqvP7anmozoSc5wS6XrSoxWWHDxdpj5Mo10+RQWbPoPczcaWmSlOCQpp5ev2dVjZZlBQAYH13bD5U3m8+LspPkwuWFo/aby06LlytXTzN96WIdMXLOjIwhwaiJ0k2F4SEv1ncAAGD54NP3vvc9cbvdZurd4F1M70jb/fv3+/PlgIhouv7a3v7Ak2rr7JUdh+rl/MX5o3699mLSTAJl82OJou6663SqwY+B8XL2uk1fmcQ4mynfBCKZTm/1Bp5UeV27HKtolVlFo6+vmalx5kP56/+P/l/Mz0iQqsZO81iv3GZSAokJ6Ox2icvjkeR4e0RnMgIApij49Mtf/tKf3w6IeC6XToAaui/tHU8+lZbOyjIXlDrmOyM57ow3R8CZ6I31lp2VpmxTy0MvWVlsJk8BkUpL3kY855ra9V2DBJvOLZIDJ5uks8dlJsHlMdgD47TzSL1vg0r7QV50bqHEDJp8DQCA34NPq1at4qwCfqTlFdrLqbK+w/fcRHel9aZfJwcmxNomVK6h2U5kPGGiXt5V5etZ1t7Va/rMXLi8KNiHBQSNBnl0Pdagj7LHREtxdtKEvpduSrjdHpP9Ot6sE82iWjgzc0KvCzS19QzJjK5q6JCDp5plAdNjAQCBDD719PTI7373Ozl06JApv/NyOp2yZ88e+etf/+rPlwMiwoXLCmXfiUbp6HJJYXai2ZmeyLS6zW9ViNPl7s86WVEsKYlknWBqaNmoltwNpoFQINI3F65YM032n2wyGa5zitMmtC7r+8O2003DCzITTd8oyloxVbqd/cHTd8rqAwDAr8Gnb3zjG/L444/LggULZPfu3bJs2TI5efKkNDQ0yAc+8AF/vhQQMfQmYnFpfx+nsd7o64VfUsxAU/KXd1eZwJM362Tr/hq5eEVxQI4XGE4bKGvQVJsle5XkjT+ICoQb3QxYOS9nzF+v2YNuT5+vp59Op/MGnlRlQ4ccONUkC2eQyYSpkZUaZ36O9drCu94X50wsgw8AEN78Gnx6/vnn5YEHHpCrr75aLrnkEvn6178uxcXF8pnPfEZ6eyc/cQvAO2c4vbCjwpRgZKTEyS2XzjPPe8s6vLqGPQYCbf2SfEk7FmtulvMzEukbBkwkw+lgnRniMrMgVa6/cLZZy4dPq+vqIesEU8dui5ErVpfI7mONJjg6qzBVstLig31YAAAL8mtedmtrqyxfvtx8PmvWLNm3b5/Y7Xb5yEc+Ii+88II/XwpjoOOS9SIVkWPL25W+huTNbT3yytsV5vOSYbuQxRMo3QMmQ5vPauP69YsLCDxNkq7rur4jcrS09/RnOJ1+Tz9R1Sr7jjdIenJ/1omXdnsqyk4M4pEiEiXE2WX1glxZtyhfcmlYDwCYisynjIwMU2JXUFAg06dPN72fVHp6utTXDzQjRGDpTujz28vNZLJ4h002Li+UHHahwp72DOkeltHU1tmfcagXhDpZrKXDKTnp8TJ/WnqQjhLAZNQ2dcqLOyqly+ky5S6blheZJtMIb5q9OnwrSdf3wox4uXx1iew60mBKq3UohE4bAwAACOvMpwsuuEC++tWvyuHDh+Xcc8+VJ554wvR++u1vfyt5eXn+fCmcxZsHak3gSekNypad/dkvCHNR/SOOB5uen+LrG7VsTrZsXFZoJtCMdxoSAGt4cWd/4EnVt3TLtoO1wT4kBJhmO6UlxZrNJJ8okWmn+6Ylxtll7cI82bC0UIomOC0PAAAg0Py6XXr33XfLvffeK1u3bpXbbrvNTL67+eabxWazyTe/+U1/vhTOwtv00aur22WaUGsTSISf+uYuc0Pa2d0rmSlxMrswVbqcbjMZ79x5OdLc3BnsQwTgB26PZ0S/Np2CifB1qKzZbChpk3Etp7NFR4nL0yfzpqVLQXaSNDV1BPsQAQAApj74lJKSIj/5yU98jx9++GHZv3+/ZGVlkWkxhXTUcl1zl+9xXkYCgacwpg3GvQ3F61u7JS05Vi46t8hMQ+L/HRBefbNy0xOkpmkgoJyfSX+VcKVl0q/vrfaV25XVtsv5i/JNaZ132h0AAEBEBp/mz58vr7zyiun9pPTGd8GCBVJeXi7XXHON7Nixw58vhzNYPCtToqNFqhs6JTnBIcvnZgf7kBAgOlmm8wx9nvyRQaclm42tPZKREmtK9rS8A0DwXLisULYfqpX2zl7Jy0yURTP7328RfnQy5Gh9nvyhqqFDXt5dJd1Ot0zPTZbzFuWZ4CYAAIBlg0+PPvqo/PnPf/b1Jfjnf/5nM+FusNraWpMVhamhWU6LS7NkcWmwjwSBpr2cstPiR2S6+cMru6tMTxmlv76yu1ouXVnsl+8NYGJiHTFy3sL8YB8GpoCWUTtsMaaRuNI81tyMyQ8P6XW55YW3KqTX3T8x8VhVq6QmOcx1AwAAgGWDTxdffLFs377d91gbi8fFxQ35mjlz5sj1118/2ZcCcIZMCB3B3dbVa0pwNPNtPDRo/Mb+Gjle2SYOe7SctzDPNC7Xko/BWjp6/HzkAIAz0SmGl6wskh2H68Xt7pO5JWnjnmSnPcJe2lVlegOmJ8fK+iUFpn+UN/Dk1dI+dL0HAACwXPApLS1NHnjgAd/jL37xi5KUxLQVYCpvUPSGYqIOnGySg6eazefO0zvi79pQKjnp8XKyus33ddprBgAwdbJS4+WSFRPPOH19X40psVO1zV3y0q5K8/0S4mzS2T1Qsq3rPQAAQMj0fBochGpsbJRt27aZZuPLly/358sA8KPmYTveuiPe0d0r6xbmiT0mWhpau035x6r5OUE7Row9i+2tQ/VyrLJF7LZoWb0gd9yZEgDCR1Nbz4jHWq596Ypi2Xqg1vR8mpabLHNL0oN2jBibnl63vLqnWuqauiQl0SHrl+TThxEAEHnBpx//+Mfy61//Wh555BGZNm2avPXWW/LhD39Y2tvbze+vXbtWHnrooRHleACCLystTg6VDzyOc8RIcoJd7LYYWbeI3jKh5HB5i+w53tD/oEdMFtuNG2ZKnMOv+wwAQkR2WpxpXD7wuD/DKTUpdlIZVZh6W/fVyKma/mzkLqdLtuyslCvXTAv2YQEAMGaTHm3yu9/9Tn7605/KLbfcIpmZ/b1mvvCFL5hA0xNPPCFbtmyRjo4Oefjhhyf7UgACYHZRmiybnSWpiQ7JSYuXi88tNoGnsdJeIk++dkIe23JU3jpUZ7JvEByNbf0N4gdnsflrOhaA0LNmQa7MLEiRlASHyXBav3h8GwpHylvkj38/Jo+/dGxIGTamXuOwLLbG1qHrPQAAVjfp7fDf//73cu+998rtt99uHu/evVtOnDghn/nMZ2TWrFnmuX/6p3+SBx98UD75yU9O/ogB+F3/dMSsCU1Nem57uSkHULuPNUhinI0SjiDJSomXg9Lfv0tp6Z3edAKITLqRsH7xxHoC1jZ1yqt7qsS7nbDl7Uq5JnG6aVyOqZeREivN7QMBqMxUqgkAABEWfDp69KisW7fO9/j111+XqKgo2bBhg+85DUJVVlZO9qWAiKalE9o8tqOrVwqzk+TcudkSHaXDt4OnvcvlCzx51bd0y9ygHVFkm1WUKu1dvXK0skUctmhZtSBXYh1jz2IDEByV9R2y43CdmUS3YFqG+b8cbA2tPb7Ak9KsVs22IfgUvCw2TSyubeqS1CSH6csIAEAo8UsjEA02eWmT8dTUVJk3b57vOS27i49nkgowGc9tK5fW0707WjoaTVbL0lnjz1byJ81ycthizJQ8rwxuTIJq6ews8wEgdDYWNr9VbgJPSrONkhLskpcR3Amjw4NMeqVH4Cm4WWwXTGKyLQAAId/zac6cOabBuGptbZU33nhjSCaUevrpp83XAVbT1eOSv79dKX959YRs3V8jbo9HrKjH6fYFnrzqmrsk2Bz2GNm0vND0i4p32GT+tHSZN42SOwDBV9/SJX/besr0pDtUNlCOajWNrT2+wJPqs8j6rsGv1fNzJSHOJknxdjOAIiOFUi8AABCkzCft9fSVr3xF9u/fLzt27BCn0ynvf//7ze/V1NTIX/7yF/n5z38u999//2RfCvA7nRZT09RpPtdyAs3iWzkvR6zGYY+WhFibdPa4fM+lJVljBzo3I0GuXz8z2IcBAD7dTpfJFvWWBde3VJtJniW5yWI1WkKl7z2DhzVYZX3XzQQ2FAAAgCWCT9dee60JOP2///f/JDo6Wr773e/K4sWLze/97Gc/k0ceeUTuvPNOue666/xxvIDf6IW+NlQdrKZx6ONA0t482w7USrfTLSW5SbJgesYZv1ZvTDYtL5KXdlVKZ7dLCrITzYQ6AMBIze3OEf3otFfOVAWfqho6ZPfRBvP5wpmZUpCVeMav1UCT9u/ZfrBOPH19smB6uhTnJE3JcQIAAIRUz6ebbrrJfAz3kY98RD7xiU9IevrQXbOTJ09KUVGRxMTQCBfBowGd5ATHkHI2LR+bClre97c3y0yvD6XZV9rDaXZR2hn/jE62IcMIAN6ZlolFR0eJZ1A5W8oUre86kUyngHpfu6a5S645b/pZs5lKC1PNBwAAQLiadM+ns8nNzR0ReFI33HAD0+9gCRuX9fcr0kCU9rdYOT8wJXenatrkuW1l8sKOCmlq65GOLpcv8ORV1TB1WVcAEO7Bp/MX5UusPUZioqNkbkmazA7ABDnNoN19rMFsJry2p9pkW2mG1eCgl36uzwEAAEQyv2Q+jdfgvgbjsXnzZvnRj34knZ2dcv7558uXvvQl2bp1q+kn1d3dLWvWrJH77rtPbLag/LUQgnRyj2YT6c/k4KmN/qSlfC/uqPCNrNbHV66ZJraYaHG5BxqcJyfYA/L6ABCJZuSnmI9A2n2sUXYcrjOfV5lJpE5ZMitzxNexvgMAgEgX0MwnfyorKzONzX/yk5+YJub79u2T5557Tu655x753ve+J88884wJQD366KPBPlScppk92qNIs30088fKAhV48mY0DQ636s64npsNSwvMrry+svb3WDRz5A0LAFhRZX2HWdt1WqiWmUXyeRhMS6hz0uNl6awsiY6KMh9LZmVJfuaZez4BAABEgpBJEXr22WflyiuvlLy8PPNYG5t7e0fNmDHDPKd9p374wx/KrbfeGuSjRa/LLc9sPWWaY6uymja5ZGVxRF6AJ8YP/W+mwabEeLvp/3HrRbNNSYb2JgGAUFDf3GV6GnmzmLW59rXrZkh8bMhcUvh3fW8aeBzvsElMdLQJOC0q7d9Q0AAUAABApAuZK0UNNDkcDvngBz8otbW1smnTJpk9e7bpK+WVk5MjNTU14/q+etMf6jf+MTHRQ361gvrWbunucQ+56K5u7JTiAE8asuK50DHV9S3dcqS8RaKio2TV/BzJSot/xz934GSTyZrSco2ls7NMmV6on4tg4VwMxfnAZFTUdwwpn9eJnQ2t3VKUHXkT2lbMzZHWDqdZ4+McMSaj1etMQSctt951tMFkwGqvwbklI3tjAgAAhJuQCT653W556aWX5Le//a0kJibKxz72MYmPj590+VRGRmJAS66mUkrKOwc0pkqUzSaxcfYhNyi5WcmSnp4YcedCXbtxtvS6PCbQqc1v38nOQ7Wy/XB9/4NGkR53n1x9/sywOBfBxLkYivOBiUiMG9m/KCECs56UZntdtXa6yfbVDYKxXE9oqWJZbbv5/ER1m7jcfXLOjIwpOFoAAIDgCZmrxaysLFm7dq1kZvansV900UXy17/+dciFXl1dna8sb6waGzvCIvNJbyJbW7vEPaiBdbAtLc2QrftqRONPxblJUpQZL01NQ/tjRMq5GK+9R+rF2dNfsqgOnWyU+vnZppxjrMrq2qWupUdHLcnCmRnisMVIpAqXn4tIOR9TFaTGxMwsTJHKhg45XtVqyoiXzcmWjJQ4iWT2Ma6vnr4+KT8dePLSnojjCT7p99hzrEF63A0Sb4+S2YWpYbOJBgAAwlfIBJ8uvPBCufvuu6WlpUWSkpLk5ZdfNj2gHnroITl69KiUlpbKY489Jhs3bhzX99V+O4NHIocyvYl0uaxzIzmnKE1m5qeI290nsY6YKT3XVjsX4+WwR5sbDK9YW4z0eURcnrH9nU5Wt5nddUeszQSxKmrb5fLVJRF/gxLqPxf+xvnARGg52QVLCmT1/FyzeWO3Ub45nnOn74daqugVN86ssTf318qhsmbf+t7e2SvL52QH4GgBAABCPPi0evVqiYsb3y7pkiVL5M4775Tbb79dent7TRbUzTffLDNnzpS77rrLTLpbvHix3HbbbQE7boyfliFEcMLNpPqINLX2SGunU+wx0bJ+cf64/vyJ6tYhj2ubu6SzxzVquczZ6J87VtlisqbOmZ5hbpoAQLEeTMz5iwtky44K6XV7JCXBISvn5UxqfdfHEwk+Ha1skdqmLklNdJjehDRGBwAAIRV8OnDggPzqV7+S48ePy/e//3157rnnZNasWSbg5PXTn/50Qt9bp9npx2CrVq2Sxx9/fNLHDVhJUrxdrls/w0wLjLXHjDuzYPhNod5UOMb5Peqau+Svb5zyZWBpqciVa6eNu/E5AGBAYVai3LJplsl+0l5Z4y391/cEZ69nyOPx2nuiUbYdqPU9bml3ytqF42tbAAAAMB5+vYvcs2eP3HLLLVJeXm4+dzqdsn//fjOhbsuWLf58KSDsacBIg1ATKWlZUpol6cmx5nOdsLfmnNwx9yTx0ka43sCTlkseqWyRv++slPqWrnEfDwBggAbxdX2fSM/J8xbmieP0BkNcbIysXjAw9XesjlW0+D7XQNbr+6plx+E66RrUaxAAAMCymU//8R//IXfccYd85jOfkWXLlpnnvvGNb5jpdD/84Q9lw4YN/nw5AGeZwHTt+TMk2m6T7q4esY2jUbmXN1NKJxbqZKaeXrcp06ho6JArVpVIVhqT0gBgquWkJ8jNF5aKPdYhrp5e0/R9vLzZUjqFVRueR0WL7DraIMcqW+Wa86aLYwLZVAAAAFOa+XT99dePeF77NGlTcIQ/HTetU3h2Hq6XlvaeYB9ORNMd9fSUOIlzTCzGvGB6umSlxpmgk37oNCu9IdEsqOPVbX4/XgDW1tLhNGu7rvHO3oGG2Zh62odP1+SJNntfMS9H4h02aet0iua35qUnmOfbu3qlurHTz0cLAADg58wnu90u7e1DRwirqqoqiY8nSyLcudweeeaNU9LY1h902nei0fQISkvqL/9CaNEyvStWT5OyunZ59s1TQ8r2tAk6gMjR2uGUJ187YTJl1LGqVrlyDT3gQpUGrm64YIbsPNIgu4/WS8ygf0emFwIAgEDw6xXGxRdfLN/73vektXVgEotmPN1///2yceNGf74ULKihpdsXeFI6yed45dCpPJFOd5U1a0ADc5olFgrZU9Nyk2XZ7IFJStpLav709KAeF4CppcEmb+BJNbX1mKEEGFDb1Cm7jtab8mQtV7Y63VBYPidLCrKSfM/NLEiR/MzEoB4XAAAIT37NfLrnnnvkQx/6kKxZs0Y8Ho/ceOON0tbWJvPnz5e7777bny8FC4qJGdl5wsYO6pDA0xOvnjAlbOpoRYtcESKZA+fOzZHSwlRxujySkRwbEscMwH9so6zvg7NlIp32TXpxZ6Uv6FTb1CVrz7H+9LiY6Gi5dFWxNLZ0m80GzYgCAACwfPApKSlJ/u///k9ee+012bdvnwlAzZkzR9avXy/RE2h4jNCSlRovs4tS5XB5iy9DZm5xWrAPyzK0kas38KQ0S6ymsVO6e91m5LaO37ZyiaKVjw1AYOlarpms3uzW0oJUyWHogM/BU81Dsp0OlzXLkllZcup0f7wZ+SkSe3pCnRUnqzJAAgAAhFTwST3++OMSGxsrH/zgB83jT33qU6YM75prrvH3S8GCzluYL3OK0sTl6TPNqsmQOXtm2I7D9dLQ2m0+3xkdLZetKuYmAIA1e8CtmSb1Ld1iiyZYMZxmDQ3m6RN55vWT0tbVax7vO9kkV62d5psyBwAAEGn8Ghn49a9/LV/+8peHNB3Py8uT++67Tx555BF/vhQsTG9K8jISQj7w5PZ4TO+OQ2XN0tXjmvT3m12YKumDsof0HNW3DPRMcXk8sv9U06RfBwACQdd0XbfCIfDU2e0ya7tmpOoEz8laOivLTKBTGoYqzkn0BZ6UTpUrqxk5kAUAACBS+DXz6Te/+Y08+OCDcuWVV/qe+5d/+RdZuHCh/PjHP5ZbbrnFny8HBIzejDz7ZrnUNPWPnH77qE2uWjNdEuIm/l/GYY8x0/+01E57paQk2OX3Lx4dUf4AAAhs/72nXjspXc7+TYWjFYly0YqiSa2/malxct35M0wma1K8XZrbe6S8rmPI10SF9n4MAADApPj1Uqi2ttYEmoZbunSpVFZW+vOlgIDSoJM38OTdJT9S0d/LarKZA4XZSSZ7ICHOLgumZ/h+L84RIwtnZk76NdDP09cnJ6vbTPZaj9P6kwUBTI3D5c2+wJOqbOgw5YSTpZsTxTlJpt9hSW6SZA/KENMydJ0cCv/odrrM0A5d43WtBwAAEZb5NH36dNm8ebN84AMfGPL8li1bpKioyJ8vBQRU1Cg74MNaevjFynk5UpKTZG6EctMTJD7W723YIpLejDy/rdzcVKpdRxrkPVctCPZhAbCAKFMYN/w5/0+R0x5+lfWdphF5YXaieQ6Tp5tBT75+wvyqjle3yk0Xzw32YQEAgHfg1ztdbTJ+7733yt69e2XJkiXmud27d8uTTz4pX//61/35UkBA5aTHS0Fmoi94oWUUpYWpAXmt3IwEmUrav6q6sdNkWuVnJko40jHn3n87b5nNvuONUpqXFNTjAhB8c4pT5XBFsy94odlKmpnkbxps0u89lbTcr6mtx0wn1QyscHSofODfTmkvLS1njyW2BwBA5ASfrr32WrHZbKbx+HPPPSd2u11KS0vlhz/8oVx44YX+fCkgoLT3h/YA0Ytat6d/1zocphS1djjl6TdOSvfpMrS5JWmyZkFesA8LAKaMljxfc950qajvEHtMtBTlJI2a7RpqtARty9uVJtNK/z7rF+fLjPwUCTujVNlReQcAgPX5vcZHm40PbjgOhHIAalpeePXo2Hui0Rd4UgdPNcvCGZkmsytQOrp7paGlW5ITHFO2E6+Za1rG6O3blRBvk/nTM8TZ7ZyS1wdgbXEOm5QWBCabNVh2HK4zgSelv751qC7gwafG1m6TWZqVGj+pgRzjMbs41WQ/eafQFmQnmgzilpaBPo0AAMB6/H6lUFFRIW+//bY4nSNv8q6//np/vxyAcfDemLzTc/6iwZ/ntpWLy+0xPVXWLsyT2UVpMhWBw0tWFsnJ6nbz2jMKUiQx3k7wCUDY8gxbygPdiHvfiUbZdqDWJCI5bDFy6cpiM/Uv0BJPZ66V1baL3RZtSuKjA9GUEQAAWDf49Mgjj8hXv/pVcbtHTpbSFHCCTwhnGuSoa+4yZRxZg6YcWcm8knQ5XtVmjlXp9CXNSAqUHYfqfa+lNyhv7q+dkuCTt9/KzIL+XX+bjWYgACZOs2wa23okOcEuKQFcMydj4YwMeW1v9aDHgZue6vH0ybaDdb4KOKfLLW8frZdNy6dmuIwO55hT3P9eQuAJAIAIDD799Kc/lVtvvVU+85nPSFISjX0ROXqcbtNLqaWjP7NGL4rXnmO9XkoZKXFytfY6qWs3ZSfT8wNbVjh8510fe/uRAEAo0E2FZ7eVSa/LY9audYvyLFmyp+87KYkOaWjtlozk2IAOlDBr+7D1XfsjAgAAnIlf0wHq6urkjjvuIPCEiLP/VJMv8KQOlTWbXhhWlJrokAXTM0xWkJanTWSaUm1Tpy+j6WzmT0sfMsLcPCbwBCCEaO8kDTwpDZ5v3VcrVpWXkSDnTM+YUOBJA0r1zV1jeu+yxUTL7NOZR0rXdc2sBQAAmJLMp/nz58uRI0ekqGhq0q4Bq3CPEohxheEusN6E7T7WYD7XUd6XryqRWMeZpwBqs9uEWJvUNHWZoFe4NXAHEP6GB9rdnncOvIca/Ts9+2a5b0iDrt06Le9smwVrFuSaYFdbZ6/kZSZIjkXLzQEAQBgGnz70oQ/J1772NSkrK5OZM2eKwzG0L8LKlSv9+XKAZcwsSJUDp5p9NylZqXGSmTI1k92mSmuH0xd48mZAacPZZXOyz/rndAqRfkwFPf8t7U4TEAvkBD8AkWNucbrUt1T5Hnt7DYWToxWtvsCTOl7VKrMKU6Ug68wZVBqYCvQ0veHl7W1dTkmOd5x10wMAAERA8OmTn/yk+fX+++8f9SJl//79/nw5wDLSk2PlqrXT5ER1m5m+M6cozTS8Diejldn1jqH0biobAv/tzTITFNO9+nPn5ZjyEwCYjFlFqRIfGyO1TV2SkuSwZL+nyRptLfeWGlqBlno/v73CNDbXoR6bzi0yWVcAACBCg0/PP/+8P78dEFK0DG3prFhzwd7e1StJ8Tax22LC6u+nGV31Ld2+CUMzp3DX+51oVpYGnpQWPG4/UGuOT6ciAcBkFGYnmQ8Ncus6o1PvwmmDQSef7jrSYII7SjNHtZTOKt7YV+M7Ng2U6ePrzp8R7MMCAADj4Ne7ssLCQn9+OyDk1Ld0yfPby6Xb6RaHLUYuOrdQctKtcwE/GRpsunRliew/2STOXreZlJeVap0eH8N36ftOPxcfXtWPAILkcHmzvLa3xjQd12D8pSuLwya4rcEmzd49UtFiBlHMLUmTWLt1Nk+cw9Z3fQ8CAAChZdJXTRdddJE8+uijkp6eLps2bTprc0oyo2BFupOtNxMJcZPvEaS7sRp4UrpLqzcq4bQ7qyWFi0szxYq098jRihYTdFLa/DYpgb5PQKTSdb2j22XKtCbbI0iDHa+fDjwpzX56+2i9rFmQJ+EiJdEhy9+hh1+waBbrrkE9B3VaKwAAiLDg0w033CBxcXG+zxmjjlCy7UCt7D3RaD6fnpcsFywpmNTPsDfwNLhBKqaGNsa9ZGWxnKppNzea2u9Jd/ABRB7tUffctv7pbbqmr5ybLfMn0QNOM288pwNPXqzvU0cHW+hmQkNLt2SkxMnsovDruwUAQLibdPDp4x//uO/zT3ziE5P9dsCUNjD1Bp6UNgsvymmdVDNZDWDtPFwvMTHRonEPLU3D1MnPTDQfACKbTuL0Tm/TbKU3D9RKcW7yhKdgJsbZJDMlznxPm/Z60vU9j+ybqTS7KE1mFwX7KAAAwET5pVumy+WSF154Qbq6unzP/d///Z989KMflS996Uty9OhRf7wM4FddPe5RS/AmqqmtR45Wtphm401t3TKvJE1WzsuZ5FECACa7vmvOUrdz4uu7bk40tHZLc1uPdPa4ZP3ifJmWx+YCAADAlAWfGhoa5Nprr5WPfexjUllZaZ77yU9+Il/96lelsbHRPHfLLbfI4cOHJ/tSgF/lpMdL3KA+IDHRUVKYlTTh7/f63mpzw5OWHGvKAmqbuylDhS/zgga5wNQpyU3S5CSflASHaRI+Eb0ut7yyu8p8npUWLwlxNmls7Z+sCbg9HlPmCQAAAlx29+Mf/1hsNps8+eSTMnPmTOno6JCHH35YVqxYIb/5zW/M1zz44IPywx/+UH7wgx9M9uUAv9EpRZevniZ7jjWYXh7zStIlPXnio9F0N3ywru6J77IjfFTWd8iWnZWmAX12WrxctLxo0s2PAZydlt9uOrdIjle2it0eLYtnZoktZmL7bT29HnF7+s663iMy7T7WIDsO15sNhjnFabJmQS6bTgAABCrz6cUXX5R77rnHBJ7Ua6+9Jt3d3Sbbyevyyy+XN998c7IvBfhdaqJD1i3Kl/WLC0xgYDKKsodmTRXlhHfvIb3Y1h1fnJkGNb2BJ1XX3CVvHa4L9mEBEUHX5PVLCsxEOs1Wmij9s8M3JgqzE8N+7RreYB1D1et6fqjONwHxUFmzKc8EAAABynyqra2VGTMGRslv377d7PqsXbvW91xOTo60t7eLv3zzm9+UpqYmk1F16NAh01eqtbVV5syZY55LSEjw22sBY7Vyfo4p49O+IBnJcbKodOKTlazuZHWbKUPRUgPte3L+4nyJ0Sa8GKK31+MLPHl1dPUG7XgAjJ9OzbxkRbHsPFIv3T0uKclNntRgilCYArvvZJMZmrF8dracMyN838smo2OU7OaObtZ3AADOZNJ3iykpKdLS0uJ7/Prrr5ssqKysLN9zx48fl4wM/1y8aGbVH//4R9/jz3/+8/K5z31OnnnmGSktLZWHHnrIL68DTOQGZcmsLNm0vEiWzs7yWzDGu6tqFdqU/e+7KqXX7TFNfHWnd/+JpmAfliVpeV1WatyQ5wqzwjtjAgjXMu215+TJhcuLpLTQf4Enq63vp2razBRYPS6Pp0+2Haw1GT4YKTstThy2mCHXAHkZbH4CAHAmk747XrNmjfzP//yP+VxL6/bv3y+XXnqp7/c9Ho/853/+p+kBNVnNzc3y3e9+10zRU1VVVdLW1iarVq0yj2+++WZ56qmnJv06gBX09Lrlb2+WyW/+dkj+8Pdj0tDSLVYJPulNyWBtZPOc0UXnFsnsolQpyEo00w/nTyeLAIh0ja3d8se/HzPr+9+2npIepzUGEoy2lrO+jy4hzi6XrSo2mXBa4rlpeaFkpU6ufB8AgHA26bK7T3ziE3LbbbeZAJA2Gy8sLJQ77rjD/J4Ggn72s59JeXm5PPLII5M+2C9/+cvymc98xgSdVE1NjeTm5g4p79PnxiM6Osp8hLKY001Uvb9GsnA6F2/sq5Gaxk4zsUlLtba8XSm3bJoV9HORnhInSQl26ewaKDkoykkSm8265zyYPxdJNodcsLRQrCSc/p8AoejFnRXS1tkf1Klq7JStB2pM78Fgy02PN60TvBlZ2qR9ePYmBuhk2wuXWWt9BwAgbINP2u/piSeeMGVvesFy1VVXmVI8VVFRISUlJaZHk5bETcbvf/97yc/PN72k/vCHP/iyqoYb75SRjIzEkJ9MsnVftbx9+IhJ+V63pEDmTSOzIiUl9HcfnR4RR+zAf9FeT58kp8SPe2JTIM7FrZfOl9d2V0q30y1zStLlnJmZEgrC4efCnzgf1tbr8sjf366U+rYesUWJrFuYL5kEAkKeNvJuPx148vIGooJNM3c2LSuUfScaJSo6SpaUZkpygiPYhwUAAMLApINPKjMzU26//fYRz995552jfv2PfvQjee973yupqWPvm6BZVHV1dXLdddeZHlOdnZ0maKTPeenneXl54zr2xsaOkM580v4Mr+yoELsjRnqdbnn65WMSGyWSNmwyT6TQTA69oW5t7RK3O7QnsaXEx8iJQeO8s9LipK21yzLnYs38HN/nTU0dYmXh9HMRCecjPZ2+WEobXGtzfw1Cd/S4ZPNb5XLTxtKQ3zCJdLpRpNNVawf1UtKMI6vQTFb9AAAAsFzwabx+8YtfmCDSeIJPv/zlL32fa+bT1q1b5YEHHpBrrrnGfK5lf4899phs2LBhXMeivWuG968JJfXN3b70eNMgtK/P9AZKirdLJNMbapfLejfV47FoZqa4XH1S1dAhyQl2WTU/d0J/p8Hnor2rV17aVSkt7U7JSY+XdYvyJdY+0DA13IXDz4U/cT6srbmtZ8jjzh6XOF2eiPo/G642LiuUNw/USmuH0zSpXjY7e9Lfs7K+w5Rra8bczIIUOXduNoFKAAAQ2cEnf053+fa3vy333XefaTxeVFRkHkcSDSCYpkCnxURHSUZKZGY9hePuuN48iEz+psRry84KqT/duLystl227quR9UuC32cEwEjZ6fFS3djpe5ya6CDwFEbT8y7w49qrgyBe2FEhrtOZjDqxLiXRIXOK0/z2GgAAACEXfJqsG2+80XyoOXPmyO9+9zuJVLpjev7ifDle3S49PS76M4Tw5KPX9lZLV49bSnKTZMW8HBN88remYZkUwx8DsI7FpZlms6ahzSnRUX2yat7AgA2EjmOVraaEUi2emSmzisae9T1WOpHOG3jyYn0HAABWEpLBJww1uyhNVi0qNH13KKEJPW6PR57bXm52rtX+k01mV1zL7vxNmxXXNg30GaF5MWBdGoBeOT/X9MBifQ/djYWXd1f5Mr5f3VMlKUkOyUnzb4+n1ASHOGwx4nS5fc8xpQ4AAFgJc7aBINNsJ2/gyUv7dgXC3OI000dGe4MkxNpMHykAQGA0tvYMaTWgnzUGYH2PdcRIcW6i1DR2mo8Z+clSWuj/DCsAAICJIvgEBFl8bIzEO4YmIWak+H/HWpuNv7qn2kxCLMhKNM2LB09bAgD4V3py7OC2jObzQPRlPF7VKkcrWiU3I8F8lNd1jNjUAAAACCbK7oAgi4mOlovOLZJX91ZLd49LinOSZOHMDL+/jvb/cA+b7Fjf3CWFWaE11l5vqHSiU2tn/5Qobcqu5xAArEZLm3WqqLfnk5ZT56Qn+P11vIMkvHTinU7S0xLuUKIbIm8drJNet0fmFKXK3JL0YB8SAADwk9C6KgHC+AblmvOmB/Q1dFKW9pDxDCoB0V35UKMTnepOZ2xpQE1Hia+clxPswwKAUWn5W6BL4NKThq7lOvk2OcEuobax8Py2cl/fqtf3dUtCnN1syAAAgNAX0OBTb2+vHDhwQGbOnCmJiQPZFffff79kZWWJlWmPhrePNpjeOIlxNnNzqxdBQKjSsds6GXHbwVrxePpkXkm6lOQmSyjRwJlmaw1W2zQwih4Yix6nW948UCstHT2Sm54gy+ZkkT2HkKYT9Jrbe+RIRYvYbdGyen5uyF2z6PEPbpjuXd8JPgEAEB78GnyqqqqSL37xi/LpT39a5syZIzfddJMcOXJEUlNT5b//+79l/vz55uuuvPJKsbrdxxrl7dNp8nUi0tLhlGvXzQj2YQGTMiM/xXyEKs3cSk5wmJI7r9TE0MveQnD9/e1KqWzo8JUraVCT5vsIdSvm5ZiPUKVre3R0lNkc8WJ9BwAgfPh1q/eBBx6QtrY2ycjIkKeffloqKyvlf//3f+WSSy6Rb33rWxJKqhuHZlNoeU9P79AdOQBTb+OyQklLijU3KQWZibJyfmjebNU0dcpz28rk2W1lUnU6EIKpUT0sW274eg9g6iXF2+X8RfkS54gRW3S0zJ+WLqWFoblZcqisWf629ZRs2Vlhhn0AAAA/Zz69/vrr8qtf/UqKiorkP/7jP2T9+vWyfPlySU9PlxtvvFFCSUqiXaoaBh7rxZDDRlkGEGzap+q680M7C1EbAT/3Zrm4PB5f8OPqtdNDsgdXKEqOt5tsVq+UBEdQjwdAeGTnqmOVrfLa3mrf44bWbvOeRWkvACDSRfu7x5OW2Gm/pNdee03OO+8887zH4xGbLbR6my+fnW2yKnQsckKcTS5cVmgaGwNWdvBUk8mmeW1PNWO2LT7RyRt4UlpmUkP2zZTZsLTQlPjoip6dFi+rF1ByB2tzuT2y/WCtWd91ct7gwRGwluGZrG2dveYDAIBI59eI0IIFC+TRRx+V7OxsaW1tlQ0bNojT6ZT//M//lHnz5kkocdhj5JKVxeYCT/vMAKGQ5v/6vhrf46b2Hnnf1aG9gxzOmTfDJYXYZKpQphlmN14wk/UdIePVPdVyvKrVfF5R3yFuT59cvo5G3FaUOGx918mD8bGhtQELAEAg+PXd8J577pGPfvSj0tTUJHfeeafk5eXJv/7rv8rzzz8v//Vf/yWhiBsThAq9IRmsoaVbOrrJfrKaXpdbymrbJSYqygQIUxMdcs6MTCnK5kZyqrG+I1RU1A1d33UNgfVoOW9bh1O6nW6T0arr+7pF+RJrjwn2oQEAEF7Bp8WLF8vLL78s7e3tkpLSn3Hx/ve/30y/S0tL8+dLARgmYdjOqs0WbXqV9XYH7ZAwihfeqpCq0yV2yQl2OXdujpwzIyPYhwXAwhLjbOJsHxh6khRPJo3VaKn7X984JV1Ol3nvVZeuLJastPhgHxoAAOHX8+nmm2+WRx55RKIHNVWcMWMGgSdgCiydnSU5py9ybTHRsmFpgfk1ELSv2/4TjfLS25Wy51gD/UfGqNfl8QWelPaRG56xBgDDnb8437fBoEHr8xbmBzQ7U/tL6fp+srotYK8Tbupbuk3gabBKJpkCAODj162zNWvWyE9/+lN54IEH5KKLLjIT7tatW0ejbmAKaFr/FWumSY/TLXZbtDhO77wGwo7D9bL72OlxkFX9DVXXLswL2OuFC1tMlPm30SCUV3ws5RgAzi4jJU5u2lgqPb1us9bbA1TGpRsLz20rN0MR1LGqVrlgSUHIT6CbCqOt5fR6AgBggF/TIu666y554YUX5Cc/+YnExMTIJz7xCdm4caN897vflePHj/vzpYCgTLDR8clvHaozNwBWFeuIkejowAZ8TwzbDT9Zw+74WGggXm/kvBlpaUmxcu6cnGAfFhDxDpc3y6t7qmTfiUYTgLHq+hHnsAV0Q09Lx7yBJy+yn8YmKzVelszKMlM01fS8ZCktTA3yUQEAYB1+35LRiyLNdtKPrq4u+c1vfmOCUQ8//LAsX77c9IC69NJL/f2yQEBVN3bKs9vKfTclFXXtcuXaaRIzqMQ0ksQ7YqRtoHrM198C70wbi7970ywTwNRdcZpeA8H19pF62Xmk3ve4tdMpaxZEZian3da/eaHNsr1Y38du6awsmT8t3Zw/sp4AABgqIHfOtbW1Zrrdu9/9bvnOd74jCxYskK997WumLO9LX/qS3H///YF4WSBgTlS1DtkNb2zrkZZ2p0Sq1QtyTfCktqlLOrtdsvacyLxRmyjNfEqMsxN4AizgeFXr0MeVkZvpo2XBq+fnSnNbj1nfda3SbB6MnZZFEngCAGAkv747/ulPfzIfb7zxhmRkZMj1118vP/jBD2T69Om+r8nPzzfBpy9+8Yv+fGkgoIaPSY46Xd4WqcrrOsxNSlqSw9yc6Njv3IyEYB8WAEx6fY/ktd1bZpecaBe3u09cLrdppF2ckxTswwIAACHOr8EnDShdeOGF8uMf/1guuOCCIVPvvGbOnCnvec97/PmyQMCdMyPDTCVraO02gaflc7NN5kqkOlrRYrJ2HKdv2o5UtMiKeTmWPl5nWYvE26OkKIubKAADVs3Plee2l0m3DmuIiZa15+RKpNKeTzqhTUvKvcNSdf20avDJ5fbI/pNNYo9tlawku+mjBwAAItzQAN0AADbXSURBVCD49Pe//91kPJ2N9n3SDyCUaJDlyjXTpKXDKQ57f8lUJNNzcLbHVqIN4vcebxRHrE2cPS4TODxn+tnXKQCRIzM1Tm68YKaZ2pkYbx+RCRVpEzm1d+fgMnPNcrUiz+nJfHXNXWZ9d/W65ZKVxZKTFh/sQwMAAIEOPmng6cCBA3Lo0CHxePpHiesFjNPplN27d8s3vvENf74cMKW0CWt6svV2VfX/2LHKVmnr6pX8jIQpKX9bOT9Xnt9WLk6X25TdrbZwc95DZc1DHh8uayb4BGBEo+2MFOsFnXpdbjlU3iJut0dm5KdIcoIj4Odh1bwc2bq/RjT8lBRvl6WzrdnzSfsu1jR1+nrnaZPvYxUtBJ8AAIiE4NMvf/lL+eY3v2k+H7xzpp+vWLHCny8F4LTX99X4Aiy7RGTD0kKZlpcc0NfUi3vNFGjpdEpyvN3SzVV1177X1R8MVxosAwCr05KyZ944ZQZcqL3Hm+Sq86ZJSoADUPOmpUtRTpJ097gkNSnWsplPox0X6zsAANbl13fp3/72t3LnnXfK22+/Lenp6bJlyxbTgLy0tFQuuugif74UgNM7vZrJ46Xh3oOnmqbktbUprwahrBx48vZziYmJ8t2srPRzbyoNsmtPlD3HG6Tp9E0iAEyWTpvzBp6UZprq5NWpoBlPWWnxlg08eY9x8cxM3+OURIfpz+hPOtV1/4lG2XeiUbqdLr9+bwAAIo1f7xqrq6vl5ptvltjYWJk3b54ptbv44ovl3nvvlQcffFA+8IEP+PPlgIin1QZR0VHS5xnozxHDzu8Q2ij3XRtKdUtcoj0e01DYn17aVeUb1b7zcL1cqj1H0pn8B2Dy/ZdGK//GgGVzsmVWcZrExTvEJn1mIIi/OHvd8tRrJ6W102kea2Pzq9dOj/hpiAAATJRf78ISEhLE7Xabz0tKSuTIkSPmc818qqio8OdLAThd0qr9ObwX3A5bjCyzaH8Of2lp7zE3AToOfHBT3LPRJsJFOcl+z9Lq7O71BZ6U29MnB08N7TEFABORnRZv+jx5ac/BucVpEs6qGztNplFtU+eY/4yel4LsJL9naZXXdfgCT6q9q1dO1rT59TUAAIgkfr0T0yl2Dz/8sHz5y1+WBQsWyKOPPiof/vCHZfv27ZKYmOjPlwJw2tySdNNkXC+MM1PiLF8GNxk61ehvW8vEdXqgwazCVFm3KD9oxzNaFgKZCQD8tbmwfnG+zC1JE7e7T3LS48O6p5EGnbYeqDWf6yqqa3tpYWrQjidmlLV8tOcAAMDY+PUq5rOf/ay89NJLpvfTVVddJfX19bJq1Sq555575MYbb/TnSyHM9TjdcqSixWSVuE8HGnBmaUmxUpSdFNaBJ6UZT97Ak9Kfka6e4PXhiHPYZNGgniNxjhhZOOgxgJFaOpxmSEJlfUewDyUkAlC56QlSkJUY1oEntft4o+9zzWndfawhqMdTlJNozr1XVmpcwId5AAAQzvx6pzpnzhx55plnpK2tTaKjo+WRRx6RJ554QvLy8uTyyy/350shjGkw4anXT5pMHqUXf9pHh4wSjPYT4B2zHSzL52SbyVBd3S6TmRDuAUBgMmoaO+XZbWWmRFVpw2jt2wNEWWxtj4mONtcelQ0dJhqWn5VgngMAABPjl7uk9vZ2+fnPfy5PPvmklJWV+Z6fNm2aXHvttbJhwwZ/vAwixNHKFl/gSdU0dZo+ELrz2+vySEV9u0RJlNmV5EIwsmhWkfbh0KlP6pzpGZZo/qpT/wC8sz3HG32BJzmd3bJ4VqZZy3Xd1/U+Mc4ueRk07Y80GoR8dXeVyXrSwNOSWcHvX6ibXppVDAAALBB8ampqkve85z1SVVUll1xyibz73e+WlJQUk/20d+9e0wPq6aeflv/93/+V5GTSlTFxvS63PPX6KWlu7x89TUZU5NHGsteeP11qGrskPjZG8jPpJQeEg8bWbnnmjVPS6+4vq9VyVs0qROTQHn4ZybHS1N4jGclxZr0HAADhY9LBp+9///vi8XhM1lN+/sjGv9XV1XLnnXfKL37xC/nUpz412ZdDBCgtSDUTwwaX3eVlJsjhsmZf4EnpDnlFfYcU54T/rmRHd6/UN3dLUrxdMlPjJJJpVsTMAnuwDwPABCyckSFVDR2+7CfNZtSsp13HGnyBJ29G1KKZGWK3BT+zcSoCb22dvWZt1zU+kmWkxJkPAAAQfiYdfNqyZYuZbjda4ElpvycNOn3rW9+adPDpl7/8pTz22GPm80WLFslXv/pVOXHihHzpS1+S1tZW03PqwQcflIQE0vVDmfbMuXrtdCmrazeTZUpyk0wKft9ApYZP32hPhpl6nfD2ZpnvxmzF3Bw5Z0ZGsA8LAMZNJ3Nec950qWnqMoEWLac2Rl3fJewdPNUkb+yrMX99bSh+8blF5hwBAACEm0k3zNGJdhr0OZt58+ZJZWXlpF5n165d8oc//ME0Mf/LX/4iLpfLlPJ9/vOfl8997nOm0Xlpaak89NBDk3odWIP28dEU/Bn5Kb6+TtPzk4fsCuvuqO/GxaJBo4q6dlMuOBlvHx2aEbD9UJ14IuGuDEBYSk2KlTnFaUPW7/nT04eMsZ9bnCYOuzWznnQCq2ZvaS/CyW6AbDtY54u7udwe2Xmk3i/HCAAAEHaZT729vRIXd/YUaf19DRZNhvaRuu+++3xZTRrQOnjwoOkttWrVKvPczTffLO9973vlrrvumtRrwZp0rP1Va6fJyWqdphgl0/NSLDt6euv+Gtl/ssl8rgGzK9dMm/AUNM+g5rxGX1//DU+QJwEBgL9oefXV502XqoZOSYi1WXakvQaINBO1rrnLPNZm1JuWF0rUBNZjXceHbySMWO8BAADCRMjMBJ8+fbr5UA0NDfLb3/5Wbr31VlN255WTkyM1NTXj+r4axAj1htUxpwMw3l/DWZLNIefMzLT0uWjtcJqeVd4x0Z3dLjlwqklWzs+d0PdbWJpp+lt571E0QyDW8c7/da1wLoJNpyM2tHZLQpxN0tMTI/pcDMbPBqwoLSnWfFjZscpWX+BJlde1m96DE5mIpgGrBdMyZM/xhv7Hp9d3jE1Xj8v0gdQs6HROGwAAkRF80mbi8fFnHjXe2dkp/lJeXi4f+chHTJbTihUr5IUXXhjy++PdfczISJzQjqUVpaQw7t0K58ITHS2OYVlOcfEOE/yYCP1zudnJUlHbLsmJdpldPL6r7Ej9uejs7pUnXzjia1K/Yn63rFk4em+6SBWpPxvARLkHlUAPPDfxbKVz52ZLVmqctHU6JSc9XnLS6fc0FlryuHl7uSlJt9ui5doNsyQtPmT2UwEAiEiTfqcuKCiQp59++h2/7kwNycdj//798uEPf9h8aHmd9pGqq6vz/b5+rg3Ox6OxsSMsMp/0JrK1tWvUC+NIYolz0dcnGUkOqW7oD7rGxERJQXq8NDV1TPhbOqJEZuT2B6/G+n0CdS56nG5p6eiR5ATHhEsJp6r0sbah/1xpgHnb/hopzkowJT2RzhL/T85iooFaINC0HHD3sUbpcva3EkhJcEhB1uQCRlYqMdQps909LklLjrVsWbt680CtrxeiBv9e2lkh16ydFuzDAgAAZzHpu7DNmzfLVGhsbJQPfehD8pWvfEUuvfRSX+BLM662bt1q+j7pJLwNGzaM6/tqf4Vw6bGgN5Eul/VuJCPxXFy4rFAOl7eIs9ctJbnJkproCNrx+PNc1DZ1yvPbK8TpcpsbE/17WrXpuwbJvP1UvLdQTqdbHBa+oYq0/ydAqEmIs8tV502ToxWtorMwZhemid1mzcbo47X/RKMJ6uiqqZsLl68qNn9fKxq+bmmJNQAAsLaQSQH41a9+Je3t7fLjH//YfKiNGzfKt7/9bdOIXBuPFxUVmcdAsGlgZv60sZfH6YWzll0kxtnNpD+r2rq/1gSevI13dUT4DRfMFCsqLUyVIxUtvuByQVaSpCQ5xDOJEhkA0HV6cemZew+O1ptIP1ISHZbNJurpdcubgybv6fvR20caZO3C8WWTTxWdlrjtYK3v8dl6QQIAAGsImeDTZz7zGfMxmt/97ndTfjyAv2hPomffLJPOHpfYoqPlgqUFUpwz/ua1U8EZQrvNOWnxcsXqEjlV027KA89bViRtrV3i8d1eAUBgHa9qlZd3V5kguE4+vXRlsckqsmIWpJmiOoi3rM2KzpmRYc5nfUuXZKXFy4qF+ZMqbQcAAIFnzS04YIoaUmt/i2DbdqDWBJ6Uy+ORV/dUiVXNKkwZ8rh02GOryUqNl+VzsmVRaaZlMw4A+JeW27Z2Ok22UTBpMOfVPdW+7Et9v3nr0ECfSivRAH1+5kAJtXbCnJlv7fVde2WdOzfHZLkCAADrC5nMJ8Dfzaj3n2wyn0/PS5YLlhQEbeqhljsM5uzt34G24hTGxaVZpuSkvqVb0pNjZXYRF/0ArEOzMZ/bVia1zV0mgLJiXo4smJ4RlGNxe/pMefLwXnRWpO83m5YXmvdFDdpp9u3gYBQAAMBkEXxCRI5o9gae1InqNinOaZOZBcHZ5Z2Rn2KCOV4aDLNi4MlLd5mtvNOsgTu9edImwDqCG0Dk2Hu80QSeVN/pzFLNkNGg+VTTbMuSnCQ5Vdvue25GkN5nxnq8iyzeO0mzyHTSYJwjRmK04zsAAAgZBJ8QcbpPj8h+p+emiu7Ka5Px2sYuSU60B22XPhz0utzy3PZyqW3qkujoKFl7Tp7MsnCgDIB/DV/L+05nGwUj+KS0h9/e403S0dUreZkJZrMBE9PU1mOy2rRMPd5hk4vOLZLM1LhgHxYAABgjto0QcXLTE8yuqZc2+S7MDm6D79KCVDNVaOGMTIm2cNaT1e0+1mgCT94d8tf2VA+5GdWsKH08vLEugPCgWU6DV9DURIekJgWvwbdm5+hkPF3fCTxNzmt7q339ETX7aXh/RLfHY9myRgAAQOYTIpA2Vr1i9TRTnqGNaeeWpJkbFIS+zu6hWQ/676s3I0kJIvXNXfLoC0ekvbPXTEm6eEUx/+5AmNE+RZoRc6yqVWLtMaaMjPKs8DC8gXxXz0Cg6eDJRnnm1eOm55f+DGj/KoZMAABgLbwzIyKlJDrMTvS6RflmIhqCQzOQhjfknQxtkjtYWlKsb6z582+eks4ul2/qlGZFAQg/msm6fnGBrJqfazYbEBwa/NdspECt797HGpTavK1M3O7+jNaqhg7Zc6zRb68LAAD8g6syAEFRWd8hf3+70kz7y8tIkAuXFYrDPlAOOdGSG51ceLK6zfTRWjory/R+8gacBuvoHvoYAOAfe441yFuH63WHQeZNSzeBwMnSyYVxDps0tHZLRnKsrzl6t9NtJgsOxvoOAID1EHwCxriDSy8m/9F+TC/urDAlEt4JhDsO18vqBZO/QdG+KqP1VtHA1K7WgamCRUHu8wXAGljf/auuuUu2H6rzPdbpstprUdfgydB/I+2fNVxKol3SkmOldlBZHus7AADWQ/AJeIcgycu7q+REdZs4bNFy3sI8Kcmd3AU0xGQ7eQNPXsMzk/xt47nF4nF7pKG5y5RajnYTAyByaHbMC29VSGNrt6QmxcrGZYX0gfOD0dby9gBmImlPr+svKJXNW0+avn/T85MnHegCAAD+R/AJOIt9JxrleFWrL2CiZWI3b5xlSrowcTptMD051ozO9irISgzoa2rz2ZXzcsQ1LOgFIDK9uqfalHCp5vYeeXlXpVy1dnqwDyvkZafFiz0mWnpP9/PTjKW89ISAvmZSgkPWLylgfQcAwMIIPgFn0dLhHPJY+0robvlEg0/a5+iVPVVmApvuzK5bmO/rSRRJoqKi5JIVxbLtYK10dbukMCdJ5k9LD/ZhAYggrcPW97bOyWXn7DneILuONJj1bemsTJk/PUMikU4TvWRlsew62mBKGhdMS5fM1LhgHxYAAAgygk/AWeRlJsiRihbf44Q4m5mUNxG9Lre8uKPCtxt8rLJVUhNjI7b8S6dQ6UQqAAgGHXQweH3PzZh4do5OWNt+cKDP0dYDtZKZFi85afERm/100blFwT4MAABgIQSfgLMoLUgVZ6/HlN7F2mPMtB0t35qIjm6XL/Dk1dIxUHYWCvTm6nB5s9ht0WZ60fDR1wAQKnTAga7n9S1dpgxYy3L9lUVlnmt3hkzwydnrNv0NdfiD9r06f3EB/a8AAIBfEXwC3oGWg/mjJExLETRzShuieukEoFBxtKLFlJV4+19t2VkhN1wwUxLj7ME+NAAYNw08+WPCpjfTR8vt+vr6zGMtpw6lUrM3D9RKWW27+by+pdtk6V53/oxgHxYAAAgjBJ8inAYUdh6pN58vmpkpc4rTgn1IYX2jo32Otu6vlR6nS6blpYTU+R7cHNzb/0p7pBB8AqxHgyAaUDhZ3WZ61K09J88ESBAYGSlxsnFpgew+1h+gX1KaZbKpQnV9b2nvMT9DGlADAADwB4JPEUxLDV7ZXSX9+7Qir++tlrTk2JApEwhFaUmxcunKYglFw29cdZoRZRmANe072ST7TzaZzzt7XPL89nJ514ZSUzKLwCjJTTYfoSgrLc43+U9p1haBJwAA4E9chUaw5janL/Ck9POmQRefwGA6nU97XmnQSQOXmvl0sKw52IcFYBSNw9ZyLZXVSZ3AaFbMzZHZRanS6/JIQ0u3dPW45FRNW7APCwAAhBGCTxFMSwIG72vq5+kpodOjAlNvVmGq6WWSlRpvRmi/faTeNCAHYC2Zw9byOEcMJbI4a1n4zIJUcdiiTdaTDsjY8nalKb8DAADwB8ruIpheYOpEm/6eT32yeGYWJXc4q7ZOp8mgGEyb084OoYnatc1dsv1ArZk8OLswVeZPzwj2IQF+p0MSdLjBieo2E3hac04uJXc4K814GpwN7fH0SWNbj6QmhU7vqkNlzXLwVJPExETL8jnZkpcROkM9AAAIdwSfItzMghTzAYxFcrzDlN1p4MYrI4Sa6mopyfPbysXp6g+gbT1QKwlxdlNSCIQT7dejZbL6AYyF9nwc/jOkfQpDRUVdu7y2t9r3WPucXb9+Bhl/AABYBMEnIIDTg7YdrBVnr1tm5KfIgjDIsNGpWRcuL5TX99aYAI6WaYTSxL7m9h5f4MmrrrmL4BOAcdEpgvtONJoAzZJZmZKfmSihrjAr0QQr9x5vlOioKJM5FEoT+zSrdTCX2yNNrT0EnwAAsAiCT0AAaNPW57aVmSlT3tI0DdyUFqRKqNObrBsumCmhKCXBYXpWaTmJVyjt7AMIvvrmLtMPqa+vfx15fnuXXLtuhqSEwfTPc6ZnmI9QlD5sLdfAYHIY/JsAABAuaAABjEKnuWnw6G9bT5kd7on0RvIGnrxqG4fuymLqJcbbZcOSAkmItZnyQc1GKy2k7BSIJEcrW+SZN06ZsizNUB2vmuYuX+BJ6eTPBibFBt30/BRZPDPT9DaLd9hk/eJ8SSX4BACAZZD5BAyjTXqffXOgL1B1Y6dctqpEcsfRuDQhzia26GhxeQZ6I4XDrng4KMlNNh8AIk95Xbu8vKtqyEbD9efPNJmpYzU8oKGTYpMTKO2ygmVzss0HAACwHjKf4Nd+Oi/urDAZQ6dqxp8tZBWNrd1D+gLp/nZVY+e4vkecwyYXLC0wU6a0d4Y2dZ8/PT0ARwsAgadruq7tL+6oMGt9qKpuGLqWdzvd4/77FGUnyZLSLImJjhJbTLSsnJ8rWalMigUAADgbMp/gFz1Ot/xta5l0OftLzSrrO+TSVSUhOeY4KcFudrIHj5yeyK52cU6SvHvTbFOeob0nACAU1TR2mqCTd02saeocd7aQVQxfy3Vt1nLc8Vo6O8s0GmdtBwAAGBsyn+AXjW3dvsCT6jsdgApF2oB69YJcs6utNxZzi9NkZv7E+wKN5ebE3NztrJC/v105oR4kABAoupb3DcsWCtUeR7OL02RWYarZYNDS6HUL8yRpAsEnNdbA06GyZtn8Vrm8sa9GenqHTtsEAACIFGQ+wS8S4kZmCyXGhe6P19ySdJlTnCbaU1anowW6zO9v28p8E9j0Rk8nJ2nfKAAItuGZQVEhvL5rGfS6Rfmy5pxc83mgM5cOnmqS1/fV+B5r0O7KNdMC+poAAABWROYT/EIbsK6a338xrzRTSHeYQ5nelAQ68KQqGzp8gSelO+N1zWefjNfr8sj2g7UmU+pIeUvAjxFA5JpVlOrL/tR1UXscpQ4bax9qYqKjp6Rkrqy2fchjXdu1TP1sWjud8tqeanlpV6XUNo2v3yAAAIBVhebWJSxp3rR0mV2cKjrgTUcdY2wSYkeWfMS/Q1aB9l/RoJU6XtUqLrfHnH8A8DfdVFi/pEDWnJMn0dH9gRuMTWLc0PVdG5Sf7f1RA1PPvHFKunr6y9hPVLfJVWumSUZKXMCPFQAAIJC4goRf6U0JgafxmZGfbHqQKN2HX1yaKTlp8WfNevIGnrxODdtdHy8t9Xtld5VsO1D7jrvyACKTru0EnsbfmDzzdOBIA08XLCk4a0ZtbXOXL/CkNCu2vG7i67sOvNDSP13f955oFI/WkgMAAARB2GQ+Pf300/KjH/1Ient75dprr5WPf/zjwT4kYEyiTvcgWTE3R7QKxGE/+wQpW0yU2GOipdft8T0XP4mpU1UNHWaEuveWRANbV62dxk0mAExSfKzNrKddPW5x2KNNAOqsXz/KWq7fY6LePtIgbx+t9z1u63CaDDYAAICpFhZ3l3V1dfLv//7v8pvf/EaefPJJ2bZtm7z00kvBPixgXHRs+TsFnrzBqvMX55tJTd5+W8vnZk/4dbWsY/BeuE7ba2l3ihWU17bL6/uqZfexBnFrPScAhBhds3WAxDsFnlRWWrwsnpnpe1ySmyylpzNjJ0LLsoev91agGVkHTjbJ63ur5WglfQsBAIgEYZH59Morr8iaNWskIyPDPL7++uvlqaeekvXr1wf70ICA0BuSWzYlmJHn2lNkMo3R44YFvKJOB8KCTW+atKG6V01jp1y8ojioxwQAgbZsTrbp4aclcsN7Ro1XnCNGWgf1LI8dwwbHVHhjf40cPNVsPj9Y1mxKDRfOGAi6AQCA8BMWwafa2lrJycnxPdbPa2oGRhufjd60T8VEs0CKOb2b6v01kkXSubDZoiX+LDcmYz0XS2ZnSXVTp9Q3d5vI08p5OZaYZHWyps03PVFVNXSK29M3ocBYJP1cjAXnA7C2yZTaDbZ6Qa48t61cupwu07Nr7UJrlNwdq2gd8ZjgEwAA4S0sgk+eUcpxxjpCOSMjcUrGLU+FlJQzN6mONJyL8Z2L91x5jjS395hd8cT4ye20+0t6arzUaEDstJjoKMnKShpT6cqZ8HMxFOcDCG86Je+GC2ZKe1evJMbZxlTaPRXs9qF9C61yXAAAIHDCIviUl5cnW7duHZIJpc+NRWNjR1hkPulNZGtrl7gHXcxFIs7FxM+F/i9wut3i7LZGv6c5hSlyrLxZ2jt7TSP2lYvypa21a0Lfi5+L0Dof6emJwT4EIGxoxlN6cvCzWQdbe06ebNlZKS63x5QGasYtAAAIb2ERfFq7dq384Ac/kPr6eklNTZU///nP8g//8A9j+rM6xlg/woHeRLpc1ruRDAbOReifi3iHTa5eO11aOnokIdYmCXH2Sf89QvVcBArnA0AwFGUnyY0XzJSO7l5JSXCQ+QQAQAQIi+BTbm6ufP7zn5c77rhDnE6nbNq0SS655JJgHxYAP+zYZ6VSGgYA4djXyl+9rQAAgPWFzbv+FVdcYT4AAAAAAABgHVF9fX3hUXMGAAAAAAAAy2HONgAAAAAAAAKG4BMAAAAAAAAChuATAAAAAAAAAobgEwAAAAAAAAKG4BMAAAAAAAAChuATAAAAAAAAAobgEwAAAAAAAAKG4BMAAAAAAAAChuATAAAAAAAAAobgEwAAAAAAAALGFrhvjUBzu93S2Ngo0dHRkpaWJjExMRKpqqurzYeei9zcXPMBAACA8PfKK6/IU0895bsWzMvLkw0bNsjFF18c7EMDAJwW1dfX1+d9gNCgAaf7779fXnzxRUlOThb9J+zs7JRVq1bJfffdZ95wI8WpU6fk7rvvloaGBsnOzjbP1dXVSWJionzzm9+UuXPnSqTxeDzy1ltvSU1NjURFRZmfh0WLFondbpdIw7kYwLkIjX+jRx99VJ588knz7zT4Bur2228Xmy2y9ou6u7vlJz/5ibmhHH4+PvnJT0pSUpJEGjadBnAuBkT6ufjxj38s27Ztk+uuu863+VhbWyt//vOfZf78+fK5z31OIglr50ick5G45hiJn5PAv88QfApBH/jAB+Saa66R66+/3vcPrz8Q+ib72GOPyf/8z/9IpLj55pvlrrvukjVr1gx5/o033pB///d/N+cjkuzatUs+//nPS35+/pBgnAbpHnjgAVm9erVECs7FAM5FaPjqV78qXV1dctNNN0lOTo7vBuoPf/iDuUh88MEHJZJ86lOfktLSUrPOe8+H/tzqxfKePXvkpz/9qUQKNp0GcC4GcC76XXbZZeZmcfjNUG9vr7lefuaZZySSsHaOxDkZiWuOkfg5mYL3GQ0+IbRceeWVZ/y9q666qi+SnO1cXH311X2RRv/OR44cGfG8Phdp54NzMYBzERouu+yyM/7eFVdc0RdpzvZ3jrT3uve///19jz76aJ/L5fI9p5//4Q9/6Lv99tv7IgnnYgDnYmA9aGpqGvF8Q0NDRL7HsXaOxDkZiWuOkfg5Cfz7DA3HQ1BmZqb86U9/MtlOXhqhfvzxx31ZDZFi2rRpJt26qqrKnAP90DRJfW7GjBkSafTvrxH74fS5SEty5FwM4FyEhri4OJONNtzx48clNjZWIo3usGkpzXD6XKSlvuvO67ve9a4hmR36+Q033CDNzc0SSTgXAzgX/f75n/9ZbrzxRvnCF74g3//+983HF7/4RZO98PGPf1wiDWvnSJyTkbjmGImfk8C/z0ReMWcY+Na3viVf+9rX5Otf/7r5j6A3j+3t7Sb9TUtoIomW1v3gBz+Q973vfSbopDfZmv63ceNGkyIYaZYuXWouuHRBGNz3QFNo9fciCediAOciNOiNk5ZVFxcX+/6d9E3/xIkTZt2PNPoepz399P1t8PnQC+Zvf/vbEombTldffbXvAlDf77TcPtI2nTgXAzgX/a644gpZuXKlaTqu14J6XayPtYTGWzoT6WunvufHx8fLd77zHYlEvJ+MxDXHSPycBP59hp5PIczlcpmIo/4AZGRkRGRjOIz8mdCeX1qXOzwYp80DHQ6HRArOxQDOReh4/vnnJSUlxdwolJeXy6uvvipXXXWV2cHXRvGR5rXXXjPvbdoY/+WXX5atW7eam8oPf/jDEbUzq/9vddNJ+xkO33T6yle+EjG9fUY7F0rPhf5ccC5E2traIvLnAiPf8w8ePCgzZ86UFStWmJ8F7fv4ve99Tz796U9LpNJpiN7rIO3jE4kb1YNxzTES1x2Bvf4g+BSCvAvm8JGyeiN52223RVQQyjuV4Omnnx4xXjeSpxJoSaZOANQIdSROvhmMczGAc2FtehG8f/9++e53vyu//e1vZffu3XLRRReZix+9adDGjpGW2aqp7vpzW1hYaNZ3zd7bvHmzaST8b//2bxJp2HQawLkYwLnA8IwWXTd14rMGoW699VZz46x0Df3jH/8okeZf/uVfRjyn7yWbNm0yn0da5YjimmMkrjsC/z7Du1OIpgTqdAKtYx8+neBLX/pSRE0nuOeee0zfml//+tcjphLoaN1Im0rgnUiwZcsWX+Cto6Mj4ibfKM7FAM5FaNCSkb/85S8mKPjCCy/I73//e5OV9u53v9ukO0eal156yaR66wWfbijoY92JvOCCC8xI9UjCptMAzez54Q9/aHo9XnzxxUN+FvSmOxJvDvTfPysry3yupWba8wiRS4MI+l6i9P/H+9//fklMTDSZzpGac6Abbtob96Mf/ajJ9FGvv/66uQ6KVFxzjMR1R+CvPyLnaiXM0gGHj40tKSkxabVXXnmlRJLDhw+PuMjS/xAamIvEhfOzn/2sGSuskXtvVotG77UuV4NxugMWKTgXAzgXoUF7CmhmmgbStY5eA4R6IagjbSMpuOClN0ktLS0mw1U3XDTNOz093XzudDolkrDpJEMCTFpKpO/xP/vZz2T79u2mJEDpLn4kee973zuiNEbHgWsfTKUbc4hMPT09pkRI+7XoRqzeJGoPm0gtpdLNag0gaNmhXhOtXr1afvWrX5mslkjFNcdIXHcE/vojMn+ywmQ6gQacIn06gXcqgQbeBovUqQTeiQSDeScS/PznP5dIwrkYwLkIDfrGftNNN5l+C3pzrbvUa9euNbuT3pKJSHLnnXfK5Zdfbi4GP//5z8sHP/hBWbNmjSkJ0PMUSdh0GqDXP5r5pPRm8kMf+pBpoqw3lJGW1aH/Px5++GHTw0dLRPTvr9mskTjhDQN08p/27Pnyl79s1oiioiITqNU1VQMMkUrfT+fPn2/61GgPzMFTwyMR1xwjcd0R+OsPgk8hiOkEA5hKMBSTbwZwLgZwLkKD9p6YPXu2PPfcc3Ly5Ek599xzTXmAZqwtXrxYIo2muF922WXmBkFLRrThp14A6g72unXrJJKw6TRAbwq074SW0ej0rh/96Ecmq0PLziItq0NvFjWDQ2+mNdhw/fXXm/8rkVxKBJE77rjDvH+kpqb6ntOgi77na7ZPJNN1QysmtMRMG7JHMq45RuK6I/DXHzQcD1FMJxjAVIIBTEQawESkAUxEAkKbvq/de++9Qzad9P1fbxh002l49m840x4lurn01a9+1fTkUBUVFWaHWq+HtOws0mg5iGZ/VVZWytGjR+XJJ58M9iEBAMLAVj9ffxB8CkFMJxjAVILRMflmAOdiAOcCCO0Aw65du8xFn/4f1qCx7k5rj45IowF1fb8fnLmp50czPHUTLlJpuYwGniL12gcAYO3rD4JPIUjrK73TCTQ90DudQAMwWlbz9NNPS6TQJsqjTSXQH2s9N5piHEmYiDSAiUhnx0QkIHRoRsvZFBQUSKTgXAzgXAAAQul9JnLuRMMI0wkGMJVgKCYiDWAi0gAmIgGh7SMf+Yjp66jr+vA9Q/2/raX4kYJzcfZzoedAP4+0cwEAsP57LplPIUhLyv71X//V9HjSbCcttxs8nUCnXEQKzXrSDBb9Mf7kJz9pgizeqQTXXnutmYITSXRCw/CJBIMz5jQjKlJoppP+fCgNyOnPgjZT1IlI2pT18ccfl0ih5bmjTUT6xje+YX6f5rSAtenGyj/8wz+Y935dxyIZ52IA5wIAEErvM9F+OSpM+XQCvZnUCKRm9wyeThBJgSdvgGHLli3ywgsvyHve8x554IEHzGQvnUoQaYGnwRMJhovkiUjKOxHp2WefNVk+kTgR6ec//7kpydT0WZ2O5J2IROAJsD4dFKCZrY899phEOs7FAM4FACCU3mfIfALCCBORBjARaSQmIgEAAAAIBoJPQJhhItIAJiKNjolIAAAAAKYSwScgjDD5ZgDnYgDnAgAAAEAwEXwCwsg111zDFKDTOBdnPxdMRAIAAAAwVQg+AWGEyTcDOBcDOBcAAAAAgolpd0AYYfLNAM7FAM4F8M5cLpf86le/MlNjly1bJmvWrJF//Md/lNdff12s5I033pC5c+eawQmT9Yc//MF8LwCw0lRvXZe8HwsXLpSNGzfKV77yFWlsbBQrOXbsmHzmM5+RtWvXmuPUY9dBN/X19b6v+eEPf2iet5o///nPcsstt8jSpUvNe9673vUu+b//+78hX9PU1CS///3vJ/1ahw8flhdffHHS3wehj8wnAAAQ0Xp6euSOO+6Qqqoq+eQnP2kuxLu7u03A9n/+53/k3//93035qhXo0ISWlhbJyMiQmJiYSX0v/Tu2tbUNGcoAAMGkgZrLLrvMBP+969ShQ4fM1GYdIvO73/1OkpOTg32YJsB09dVXy4UXXijvec97JDU1VY4fP27eL3QzQ4fb6LCfjo4O8x6ja7ZVPProo3L//ffLF7/4RZMRr+EAHUbzzW9+Uz760Y/Kxz/+cfN1//Iv/2I2On7zm99M+t/0hhtukE984hN++hsgVNmCfQAAAADB9P3vf18OHjwoTzzxhOTn5/ue1wtzLVv9xje+YS6eExMTJdj0ZsZfwaK4uDjzAQBWkpCQMGSdKy4ulvnz58tVV10l//Vf/2WyjYLtmWeeMUEmnRys/TNVUVGRGeJy5ZVXyksvvSQXXXSRed+wwnvHYP/7v/9rMp1uuukm33MzZ840U6J//etf+4JP5KjA3yi7AwAAEau3t9dkOGm53eDAk9enP/1p+c///E8TpGlubjYlFRs2bJDFixfLrbfeasrgBpdXfOADH5Af/ehHct5555kMqi9/+csmo+ojH/mILFmyRC655JIh5Qca1Hr44Yflwx/+sPl9ffzcc8+ZD93915KID37wg9LQ0DBq2d2uXbvktttuM6+1cuVKs7M8eMLl448/bm7YFi1aJOvXrze73Zo9NVrZ3Vj/fnq8F1xwgfmeuuN/9OhRv/+7AMBgGtTR9fPJJ580jzUbStdVXfe05E0DPb/4xS/M72l5nj6n699g3/72t03QZSxr5zvRgJNmNb355ptDni8tLTXHqKXbw8vudN3WNfevf/2r3Hzzzb5SPc3mGl4Sd+2115p1WP9eWhLupdmq9913n/n+mrX0vve9T3bv3j2uc6kZZDt27DBZtIPp+5D3WO6991754x//KFu3bvW9T+jXf+lLXzLvJeecc44pN9THXV1d5vf1/WLBggXmPWL16tXmfVVLJisqKsz74nvf+95xHSfCD8EnAAAQscrKykzQZfny5aP+fm5urrkBUFoGsm3bNlP+oYGbOXPmmMCQ3sR46e9r6cVvf/tbc1GuF/K6u3zFFVeYP6M3JnpRP3hH+Sc/+YnZKf/LX/4i8+bNk7vvvlt++tOfmtfRX/XGQgNgw7ndbt/Nl96s/Pd//7e5efrCF75gfv/AgQPmGPSmSm92dIdeS0E0c2C07zXWv9/27dvNzYXunmtQTANWABBouibpmq0ZqbpepaWlmT5FmrV6+eWXm7Kx/fv3mxI3DXoMDj55PB6zTmpA5J3WzrHQoL5uWGhA5frrr5cHH3zQbBrosc2aNeus2U4PPPCAKW97+umnzXHqQBj9e6mnnnpK7rnnHrnuuuvMsX32s5+V//iP/zBrsr5v3HnnneZrf/azn8kjjzxiNih0qMy+ffvGfOwf+tCHzNfrJoIGnHQ913VeyxlnzJjhy/zV9y0Nzr388svmOX3v0j+ngSR9T9GyPD3Hg4Nnem63bNlintPNDj3uvLw88++lgThENsruAABAxPLu/Gq/jrPRi++9e/eaAJHeACkNumhg6Oc//7kp3fPe4Ojz2uhfL+I1kKM71HpzovQm4YUXXpC6ujrJyckxz+nNh/f3tQHs888/b8pKvEEvzaLShq3D6U2ONoTV71NYWGhKU773ve/5sqR0l1135/X3NGtAP/RY9dgm+vfTMhPtaeI9X5odpX9HAAi0lJQU37qtGT+33367L8ij/fo0sK4l1FqipxlOH/vYx0wpmW4ivPbaayYjSvs0vdPaORYa+NLAyi9/+Uv529/+Zn7VD82S1YDOP//zP5/xz2oGqWY0KV3rdbPi7bffNsehWU66GaGBfzV9+nSTYaXfVwdg7Ny50/yqr680OPXWW2+ZcjkNgI2FBuo0IKR/Rns9abDI+1q6SaEZVRqI0te02+2+Esh169aZgJ03E0rLDLUvomahDaaBJv1eXtqfUEspvceMyEXwCbAI3d3wLuB6s6C115qSqxf2Y6FvgLoDoW+6AICx8TaB1eyns9G1WS/GvYEZpWv1ihUrfLvCKjMzc0hwRy+4S0pKfI+9PZa8pW9q2rRpvs/j4+PNr8P/zGg3RRoA0h1snWb5gx/8wAS5tGROd6uVlkborrVmXulNgt446A2PlnpM9O+XlZU1JFCnf0ZLFwEg0LTkTGkQQ0vmNONJM3FOnTplMj29GwBKs3p0PdZsTw0GaQmZrn/e9etsa+dY6XFo8Eg/amtrTYBLp8Pp90xPTzfHOBrNgPXyNk/3rqO6FmtW1WC6KaE0uKbZT9rkfDB9P9Gm5uOhGVP6oedLz50GoPQ+RDOrnn32WXPuhtO/z+bNm825PHHihBw5csRscug9y2CDA0/AYJTdARaZOqEjZPXNRRd07T+iu+Da5FZTWwEAgaE7zRpQ0Z3j0Wg/I93FPdOFvd4I2GwDe3m6Szxaf42zGfznvbwNbN/J5z73OXMzoL2p9Fj0Zkp3/PVmJDY21uxs6/vKu9/9bnOzoKUeo5WWnKmx7PC/nzY8B4Bg0OxMDWx0dnaankga6NGsJg2K6Do3mGbb6LW0ZnPq12tJnE5cG8vaORZaqqYlcl6aRaWlcrrmataqN5toNKOto941eLT3Ay8NFOnmhpa6Df7Q49CA11hUV1ebrFb91fv+pH2a/umf/smUH47Wx8r72lqqqPcmeoyanaWlf6OVrOt7DzAagk+AxaZOaJmG7iBoDbmm5eqbGAAgMPTCW9dezR7VxuDD6U6zlp5ptpDuug8uL9CbBe1/pP09guHYsWNm40J3qLWcT28+9Hg1YObdydYNDL2x0J1/fT/R0pTBN0xeWkZhtb8fAHhpsERLkq+55hqT8aTZqv/v//0/U1qnjci9JdSDA+l6ba1r2m9+8xuTYXT++eePae0cC+2R9NBDD5lS5OHvKRogGi1zaCw0K2p4A3HtEaVrt2amasmgZklpxqz3Q3sC6rkZCw18adBOKy7OVNaoGzLDN0G0l9bf//53U4KtgTsN/mmGrmadMRUPY0XwCbCAsUyd0EaImtarkyV0woSmE2ufDW968XC6c6O/7y270KyqwaUT2hBQf1/TjPWmSuu/9U0cACKNZgPpbrrunususl5M642Ft5mq7ojrWqp9RO666y4z/UdvUr72ta+ZG5v3v//9QTluLevQqUo6UU+PRxud6+6/lpXoJoZmYf34xz82u9naoHbPnj1m0p6+JwynN2VW+/sBiEyaqaR98fRD1y7NWtIyOS0fvuOOO0y/Ip2w9swzz5jrY72+1d5HanDmkm7oamaODnXQrCTNhhrL2jkW2tNJS860N5O+vk5002t57bukfZn0OCdCr/11g0ADZvpepJlben2uU/G870N6P6B9n06ePGkCU95hFmMtNddzqUGk7373uyaopOdYexF+/OMfN1PqtNzaWzau5YT6+xqQ0ownbZKujzVApllj+m/0Ttli2pdLM2/r6+sndE4QPuj5BFiAvgnoG4kGlLyLvtaf6xhr7y6EpsNqwz9tZqiLuO5w6BuO3kRcfPHFI76n3jTpG6pOyNCUZH1T0Rss3QXX5raabaVv2vrG4/19nbYxe/Zs35sOAEQC7bOkvS50TLfuIOvNjPZZ0owhvQHwron6+zpNSS/Q9WJbA/ca2NG+GcGgN1B6vDo+XDcYdFNBj0XfJ3TnXRuV67QhPW5d6/XvpBsOOrFoOL0ps9rfD0Bk0rVIP5QG0XWqnJZ5aQm0XgPrhqmW4GmgRzOBtGm49knVa2MNimg2k5dOt9Oy6sEld++0do6FBoE0g0gDW3rNrQ3M9dhWrVplJvDp9fREaJBJA/96fLoe699Nv793KIWeF9081sCPBuA06KTX9ro5PVb6Z3XDRaflabPz7u5uM5BCe15paZ2Xvqb2f9Im7dpUXc+3TqzTP6P3JHo/oVUaWr54NlrNoX8XHZwxWsYVIkdUH3lygCXoLol36oS38a136oRmOmmwSN8U9A3YS5vH6k697r4MbjiuOyGXXnqp2bHXN0cvHd2qN1V6M6U3JLprolOMvBOXXn31VVN6MdFUYQAAAMAqNFii17dk9wPBR+YTYBHvNHXiPe95j8lU0lIQDS5pkEnTV0cru9PJH2r4lA2tEfdmUul4Wk1j1l1wDVBpIEunaxB4AgAAQCjTfnVaTqcbu5pJBCD4CD4BFmigqNMiNM1Va9i9Uyf0Q8vpNNVVG/xptpKmxWqqsaYO6yQNDSCNxpvQqGmxmgI82tQlzarSFFrt7aHZVtoHRFN8tZRvcGoyAAAAEEq0nYRu4mrTca0cGCvtt/TFL37xrF+j/Zy0AbjVaHuNN95446xfo5US2gsLCAaCT0CQeadOaDmdNhkczJulpE0MtbZdg0TeCRRamtfQ0DDqhAlvnbk2AdQglpf2/NDg06c+9SmzE6RZTprtpFlPd999t3kz1Tddgk8AAAAIVTqRTT/GSysCtG3F2Xivz63mq1/9qtmoPhvt7QQEC8EnIMgGT53o6OgwmU3a7PDIkSOmiaE2INdG5Fqzrk36LrvsMjMO/Dvf+Y4poxttwoQGny688EIzRlYneehjLdnTDCvNbFKNjY1mCpI2oJ03b54ZO6sTL973vvcF4SwAAAAAwaUVA8OrBkKFDhACrIyG44BF6C6LTp3QsdbDp07oqFOdOKQfGjTSNxed+qEjXrXv069+9ashDceVTsDQTCfNZGppaZGSkhIzJUTTj5XL5fL9vmZI6dQKnWqhU468o2gBAAAAAJgsgk8AAAAAAAAImP7OwwAAAAAAAEAAEHwCAAAAAABAwBB8AgAAAAAAQMAQfAIAAAAAAEDAEHwCAAAAAABAwBB8AgAAAAAAQMAQfAIAAAAAAEDAEHwCAAAAAABAwBB8AgAAAAAAQMAQfAIAAAAAAEDAEHwCAAAAAABAwBB8AgAAAAAAgATK/weC95hjwXyyugAAAABJRU5ErkJggg==", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Scatter matrix (pairs plot)\n", + "numerical_cols = ['Sales', 'Commission', 'Days_Since_Start']\n", + "pd.plotting.scatter_matrix(df_sales[numerical_cols], figsize=(12, 8), alpha=0.6, diagonal='hist')\n", + "plt.suptitle('Scatter Matrix of Numerical Variables', y=0.95)\n", + "plt.tight_layout()\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 6. Area Plots\n", + "\n", + "Area plots show cumulative totals and proportions." + ] + }, + { + "cell_type": "code", + "execution_count": 64, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Percentage area plot:\n" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Weekly sales by product (area plot)\n", + "df_sales['Week'] = df_sales['Date'].dt.isocalendar().week\n", + "weekly_product_sales = df_sales.groupby(['Week', 'Product'])['Sales'].sum().unstack(fill_value=0)\n", + "\n", + "weekly_product_sales.plot(kind='area', figsize=(12, 6), alpha=0.7)\n", + "plt.title('Weekly Sales by Product (Stacked Area)')\n", + "plt.xlabel('Week')\n", + "plt.ylabel('Sales ($)')\n", + "plt.legend(title='Product', bbox_to_anchor=(1.05, 1), loc='upper left')\n", + "plt.grid(True, alpha=0.3)\n", + "plt.tight_layout()\n", + "plt.show()\n", + "\n", + "# Percentage area plot\n", + "print(\"\\nPercentage area plot:\")\n", + "weekly_product_pct = weekly_product_sales.div(weekly_product_sales.sum(axis=1), axis=0)\n", + "weekly_product_pct.plot(kind='area', figsize=(12, 6), alpha=0.7)\n", + "plt.title('Weekly Sales Proportion by Product')\n", + "plt.xlabel('Week')\n", + "plt.ylabel('Proportion')\n", + "plt.legend(title='Product', bbox_to_anchor=(1.05, 1), loc='upper left')\n", + "plt.grid(True, alpha=0.3)\n", + "plt.tight_layout()\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 7. Pie Charts\n", + "\n", + "Show proportions of a whole." + ] + }, + { + "cell_type": "code", + "execution_count": 65, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Customized pie chart:\n" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Basic pie chart\n", + "product_sales = df_sales.groupby('Product')['Sales'].sum()\n", + "\n", + "product_sales.plot(kind='pie', figsize=(8, 8), autopct='%1.1f%%', startangle=90)\n", + "plt.title('Sales Distribution by Product')\n", + "plt.ylabel('') # Remove y-label\n", + "plt.tight_layout()\n", + "plt.show()\n", + "\n", + "# Customized pie chart\n", + "print(\"\\nCustomized pie chart:\")\n", + "colors = ['gold', 'lightcoral', 'lightskyblue', 'lightgreen']\n", + "explode = (0.05, 0.05, 0.05, 0.05) # Slightly separate all slices\n", + "\n", + "plt.figure(figsize=(8, 8))\n", + "plt.pie(product_sales.values, labels=product_sales.index, autopct='%1.1f%%', \n", + " startangle=90, colors=colors, explode=explode, shadow=True)\n", + "plt.title('Sales Distribution by Product (Customized)')\n", + "plt.axis('equal')\n", + "plt.tight_layout()\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 8. Subplots and Multiple Visualizations\n", + "\n", + "Create dashboard-style layouts with multiple plots." + ] + }, + { + "cell_type": "code", + "execution_count": 66, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Create a dashboard with multiple plots\n", + "fig, axes = plt.subplots(2, 2, figsize=(15, 10))\n", + "\n", + "# Plot 1: Daily sales trend\n", + "daily_sales = df_sales.groupby('Date')['Sales'].sum()\n", + "daily_sales.plot(ax=axes[0, 0], color='blue')\n", + "axes[0, 0].set_title('Daily Sales Trend')\n", + "axes[0, 0].set_xlabel('Date')\n", + "axes[0, 0].set_ylabel('Sales ($)')\n", + "axes[0, 0].grid(True, alpha=0.3)\n", + "\n", + "# Plot 2: Sales by product (bar chart)\n", + "product_sales = df_sales.groupby('Product')['Sales'].sum()\n", + "product_sales.plot(kind='bar', ax=axes[0, 1], color=['skyblue', 'lightcoral', 'lightgreen', 'gold'])\n", + "axes[0, 1].set_title('Total Sales by Product')\n", + "axes[0, 1].set_xlabel('Product')\n", + "axes[0, 1].set_ylabel('Sales ($)')\n", + "axes[0, 1].tick_params(axis='x', rotation=45)\n", + "axes[0, 1].grid(True, alpha=0.3, axis='y')\n", + "\n", + "# Plot 3: Sales distribution (histogram)\n", + "df_sales['Sales'].plot(kind='hist', bins=20, ax=axes[1, 0], alpha=0.7, color='green')\n", + "axes[1, 0].set_title('Sales Distribution')\n", + "axes[1, 0].set_xlabel('Sales ($)')\n", + "axes[1, 0].set_ylabel('Frequency')\n", + "axes[1, 0].grid(True, alpha=0.3, axis='y')\n", + "\n", + "# Plot 4: Sales by region (pie chart)\n", + "region_sales = df_sales.groupby('Region')['Sales'].sum()\n", + "axes[1, 1].pie(region_sales.values, labels=region_sales.index, autopct='%1.1f%%', startangle=90)\n", + "axes[1, 1].set_title('Sales by Region')\n", + "\n", + "plt.tight_layout()\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 9. Advanced Plotting Techniques\n", + "\n", + "More sophisticated visualization techniques." + ] + }, + { + "cell_type": "code", + "execution_count": 67, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Hexbin plot for dense scatter data\n", + "# Create more data points for better hexbin visualization\n", + "large_df = pd.concat([df_sales] * 5, ignore_index=True)\n", + "large_df['Sales'] += np.random.normal(0, 50, len(large_df))\n", + "large_df['Commission'] = large_df['Sales'] * 0.1\n", + "\n", + "large_df.plot(kind='hexbin', x='Sales', y='Commission', gridsize=20, figsize=(10, 6), cmap='Blues')\n", + "plt.title('Sales vs Commission (Hexbin Plot)')\n", + "plt.xlabel('Sales ($)')\n", + "plt.ylabel('Commission ($)')\n", + "plt.tight_layout()\n", + "plt.show()\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 10. Customization and Styling\n", + "\n", + "Make your plots look professional and publication-ready." + ] + }, + { + "cell_type": "code", + "execution_count": 68, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAABKAAAAMPCAYAAADitK0JAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjMsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvZiW1igAAAAlwSFlzAAAPYQAAD2EBqD+naQAAfZtJREFUeJzt3QeYVOX5N+AHpIggduwNNXbUqBA1sUZFYyzYYsFYYi+o+Le3iMbeOxHU2Hs3miiWKKjEGruIBRsWFHvnu55XZ79dmoAcgd37vq65ZuacM2fOzs6e3fnt8z5vsxEjRowIAAAAAKhI86p2DAAAAABJAAUAAABApQRQAAAAAFRKAAUAAABApQRQAAAAAFRKAAUAAABApQRQAAAAAFRKAAUAAABApQRQAAAAAFSqRbW7BwB+rjPPPDPOOuus8X5c586d45JLLpkox/DUU09Fp06dJsq+rr/++jjooIPq7r/wwgvj9fjPPvssrrrqqrjvvvti8ODB8eGHH0aLFi1ixhlnjMUWWyzWWWed6Nq1a0w11VQxsSy88MJ1t4899tjo1q1bTM7vkznnnDP69es3SY/njTfeiDXWWGO06/J7k9+zaaaZJjp06BCrr756dO/ePWaaaaaY3Iz8dfzjH/+ILl26xORg+PDh8dFHH8W88847qQ8FAH6SCigAYIzefPPN6NmzZ2y22WYxOcggbO21147jjz8+HnrooXj33Xfjm2++iS+++KIc67///e/Yd999Y4sttohhw4ZN6sNlDL777rv46quvSniYAeS5554bG220Ubz88suT+tCmCF9//XVcdNFFseaaa8ajjz46qQ8HAMaJCigAmMxlNUtWM9X38ccfx/PPP193f/75549ZZpmlwTaLLLLIz3re2267LQ488MDyYXdykJUeO+20UwktarJiJl+fzz//PF599dX49ttvy/Inn3wy9tprr7j00ksn4RFTX36f8jJixIjyfcrvY37PaoYOHRq77LJLed+1atVqkh7r5CyD1c033zxef/31SX0oADBeBFAAMJnL4V4jD/l6+OGHY5tttqm7n8HMxB4W9uKLL0424VO68sor68Knli1bxnHHHRd/+MMfolmzZmXZW2+9VQKM2pC+gQMHliqp3/zmN5P0uPlBVjjtueeeDZa99NJL5XuWw9xShio5dDCHUDJ6n3zyifAJgCmSIXgAwBThf//7X4OKr/XWW68ufEpzzDFHHHrooQ0e89hjj/2ix8j4WWihheL//u//GizL6jUAoPFRAQUATcCQIUPimmuuiXvvvbdUCuUQqFlnnbU0U/7zn/8cCyywwBibbo+8bOQm3LfcckvceOON8dxzz5WmyBkKzTDDDKUh+FZbbRUrr7zyRPkavv/++7rb2SvopptuivXXX79BCLXccsvFhRde2CCUGlk+tk+fPvHf//63DPvKHlJt27aNueeeO9Zaa63YbrvtonXr1uN1bO+//37ZZ1bvvP3226VCKxtD//73vy/NtaeddtpRHpP9q/JYH3zwwfL9yZ5Ibdq0KceRr9m2225bGqv/nNfrsssui6uvvjpee+218jUuu+yypVqufkP50047rfRgqsmvIYfKjXysq6yySt33IJvI5/FNrBCqvk8//bTudg4BveGGG8rtrJTKoahZ+fbKK6/EdNNNF9tvv33ssMMOE/w+ry+H/l1xxRXlfZxf5xJLLBF/+ctfxvqYcWmo/1MN7LPK8Lrrrovbb7+9VB3mcNJszN6xY8fYcsstY9VVV617j9d/PWry+fMyMScdAIAqCKAAoJG79tpr4+ijjy6NuuvL/jt5yfX77LNP7LjjjuO972wG3rdv31GWZ2CRlwwCsol5hh4/V4YntZndson1/vvvH6effnoJeXKY3fLLL1+CnhVXXHGM+3jiiSdKwJQf8kfuqfXMM8+Uy7/+9a+4/PLLY+qppx6n43r88cdj1113bdCbKsOk2v4yEPn73/8eCy64YN36DBoymMq+ViOHLxmA5OWf//xn6WGVAcr4ylBtt912i3vuuafBMWWT9nwNjzjiiNJHKG288cZx3nnnld5MKZ83g5f68jWphU8ZrmXwN7EMGjSowf0xzYSX37sM+fJrq4V+WQn3c9/n+XVnsJMhan2PPPJIudRepyrkz0gGa/k+qS+HJObl/vvvL+/vM844Y6LO6ggAk4IheADQiGUAkcPSah/Ks5IiK2wyDGnevHldmHPSSSeV6eVrsppi5OqhXJaXmWeeudwfMGBAg/Apq56WWWaZBqFAOuuss+LLL7/82V/Ln/70p5htttkaLMuZ7y6++OISAGWVS85+l6FN/Sqamvw6c4a8WvjUokWLWHzxxcslK49qMgzIaphx8d5775WgpxY+5Wv6q1/9qlzq96bKbeoHI7169aoLn3K7/H7ka5eVLzXZ5+evf/1rTGiwkd/7DIsWXXTRBg3q83U46qij6oY05vshw7uarMQZ2R133FF3+3e/+93PqswaOXzKELO+/D6OTvbzyvApK8uyKmn66acvx/Jz3ucp79cPn/Kx8803X7nk7auuuiqqkIFeNsqvHz7l15TVaVndVXPXXXdF7969y+2siqpfvZby5y1/Ln/upAMAUDUBFAA0Ujn86PDDD6+rbMkQIsOZ/ECbAUsOnasfFp188sllSFrKoTwjV7nksrzUhtRlf6VaELHCCiuUao1sFJ5hRVai1K+8qT/b2YRq165dqYCZa665Rrs+A4Y8pgx3son1Aw880GB9Do/KQCYDiaxuyuFTtcvdd99dPvzXr1AaFxnA5axkKR+f+8rXNS859C2HvaUcAld/6FRW89RkdVR+P/K1y9cwq8Wy2iWDljSh4V0O/crKpQxX8rWoP1Qs3xv5vDWbbLJJ3e0MROo3uc6Q7dFHH23QTHxC5NefVV95yTBxzTXXLH28MkSsWXrppccYQKWtt966fE0ZkuXXld/Pn/M+zxDoggsuqFs3zTTTlO/pnXfeWS4ZbrZv3z6qkO+5rJ6ryUkF8vtUG0JYv3l+HlMO1cv3RoZo9eWy/Lk85JBDKjlOAJhYBFAA0EhlVUhWwtTkMKPskVST1SHZk6Ymg45xrfxJu+++e6mCyh5Gp5xySrRq1aoszyCgViVVM7qKpAmRx5zHeNhhh5XKpZ+qTKrfuDx7UmWokB/6b7311ga9eTKIqH/M43q8ub+aDFWy2qgmK1UyYKmpDR9M9SudMjzI71U+Z1bc9OjRowRUGbScc8454zwUcGRZPVW/ii17NtXvx5Xfu1pos/baazfoU1W/Cqr+8LsM2bIn0YTIoKk2rC2/Bxly1Z6/Vt2T/ahqFUsjy9cmv6c1s88++89+n+csfPUfm/2k6g/hzDBsQoamjoscDln//ZABYQZqtSAsh5jm8LxTTz219KbKij0AmJL5TQYAjdTTTz/d4MN79pIZWQ77yuAl++mkp556aryfJ6t8aqFCVs/kPkbubZTVSRNLBjJZCZOXDJr69+9fhmdl9Uj9MCErr7JHVP0Kl9rjcyr7HHqVAVVesqn1+B7vZ5991qB6J/so5WVMnn/++brbO++8cwnR0n333VcuGbxkQ+7sdfXb3/62DC+rhXrjKx+X1UQjy0Alq6xqfa9y6GBWseVrsu6669YNN8sAKsOPWk+omj/84Q8TfEz15fsxG71nddE888xTqqEywBtb2JbDL0fXH+rnvM/rV3ql0VVf1a9Empjq977KMHXk4K02PBQAGgsBFAA0UvWbYmdPmTF9uM8m17UP5hnMjKsc+pQVKznTWv2m3lm9sdRSS8WTTz4ZE1tWr+SxZpiQX08Ot9pggw3KJatpslonK0kyHKpV+WSYVGvgnCFVDtGrPySwFvzkfuu/Zj9lfKu66odym222WRlSmLPP1Yb7ZZVRDhPMSzZBz7Alq2A23HDDGF/5/a4/O2D95fXV70uVw/BqAVQeQ84WmAHRxBh+l/bYY4/Yc889J/jx2WNsYr/PR25YPrqAa3yG4OV7sP7rXmuY/lPvn/yZAYDGTgAFAI1U/cbTw4cPL+HN6D6cv/POO2MMKMbmxBNPjIsuuqjuubIXTVaL5JCnrAwaXSXKhMreTjkzWy1Y+tvf/lZmb6svP/jnULKBAwfWTUefIVkGFBlYZaVTVvVkKJDbZsPytdZaqwyVyyqurbbaKv773/+O8zGN/FpmuJJ9fMZVVhzlJauvsoorjzuH3r399ttl/QcffBAHHHBAGUaXTabHR/alqh+81YwcMNYfdpevQzZPrwViWfmUoU9t+F1+X5dccsmYVMYULP2c93mtR1f9x48sq+zGVb636leIjTzbYn0ZQI7teUcXaAHAlEwPKABopLKqp/4H2WzKPLpgJ4OOmtEN26q/j/oftLMvTU02QM7wJQOMrCiqPxRuYsjwI5sw11x77bV1wcjYgobsqVOrnMnjrVWkZC+kI444ojRPr4UQ4xM01EKM+uFHVg1ltUz9S/Yayj5R9Ydb5TFkdVF+P3J4YDZVz/Arq8my+XQ2Mq/1AkpZ1TW+MnyqDbWrL0Oumjz2kat76od6OQyv/ux3E1KJNTGNqQfSz3mf52x69eVQ0pFlFd2YjBx05bDG+rL5/JjUf+4MR+u/v2vDBH/961+XqrOePXuOsZF//Z9LAJicCaAAoJFaffXVG1Q0HXfccQ0qfDIUOfjgg+vuZxBTv2l2/RCk/lCnDH7ydvZYqnn44YfrPgi/9dZbcfzxx0/UHlAZlKyxxhoNAoWc+az+B/78AJ+z5NVv7pxBU60KqDbzWcoqn9rsdfm4M888s0FYkJVT4/oa1w+KMjyqH2bkcL9DDz20VFvlcLtaZVFWPmUT96wiO+OMMxo838hVS2Nqyv1TjjnmmAahRc6ylw27a1ZbbbVRHpMzH9a+7xmS1QKrPIaRZ0VsDO/z+eabrzQ/r+ndu3d5b9UPny688MJxHhZYP/yq9SAbk6y+q18BdeSRR9aFUDk0MB+bFVTPPvtsaWBfa5I/tp9LAJicGYIHAI1UDgXKD945jKtW4ZONu+eee+6yLod+1Q+GsoopG1LX1L9dq4DJYUNbbrllCVRyfS3EyeqirLjJWdIy3Bm5901t6NzPsd9++5UeTrXeOTld/c033xzzzjtvqY4ZMmRIgyFmbdq0ib322qtBpUw+PuUwtxwimFUo2Yh65Kbp43q8Oewwj6HWSyj7T5111lnldXjuuefqQoF83XKGtbTOOuuUZuUZ8NRCj6zoymbc+bXl8lqYl8OvcvvxlSFWvh4ZdC2yyCLl66vfMD1fm9rx1Jff0wx0arP71Y4jZ4bLHkqN8X2eQyf32Wefcjtf/3x/ZyVfhkEZ4OVrkK/n6ELUHLaYz1ELjjJwzFkhMzDN63yfZXg3unAoA9Vsjp7N+9N1111XwtN8P+f3rv57MqsLa0P28r1Vf58ZVGW1WlZjZe8wAJhcqYACgEYsQ6OswKkNFcoP0xm4ZFVI7QN1VlRkNdHIPZVyJrb61ThZQZThSFY45fIcFlRfBhw5C16GT9lUOT8o14w8y9yEyEChb9++MfvsszeoMsnAK6tE6odP+dz5wTzDl5rtttuuwZC5DJnqz9g355xzjvfx5vC5s88+u0ET6drrUAsIct3555/foIIlq6EycKrJIC/7P+X3pRb6ZMiQoUqGFOMrZ5XLWfTye5zHUj98ysAkK6+y+md0Rn4fpG7dukVjfZ9nSJezEtbkY3M4Zb4H8nYGVLXv3ciymqp+Y/WsZMtKuAwUM3zK1y0b8o9Ofn+z+m2xxRarW5YVfTkcr374lIFgNnCvyffTcsstV3c/w6/8Hk+MnzEAqJIACgAaue7du8ett95aGnBnIJONp1u3bl0CiKz2yHXZg2hkGZBkUJIfoHP7/OCblSHZj6k2a1quX3755cu6/ICf4VCGAVnNUX/IVg47mxjyWLKvUgYJGZBloJSBSgYPs802W+nplJVSGQKsssoqDR6bFTxZNZU9dfJxWTWVQ7eywfcpp5xSgqSa/DD//PPPj9MxrbTSSqVX0vbbb1+qrDKUyNciA63NN988brzxxgaBQcoql6ycyoqpfP0yMMtQLyuT5p9//vLaZoiRodmEyOfPyqp8nTLgyO9fDhfLaqr83mRANSa117UmK2/qD39sbO/ztO+++5aQMJvo5/cvL/leytewfvgzpiq4fP9kUJg/B/m8Xbp0KT29jj322LE2Ee/QoUOZeTC/T8suu2x5bL4PskIrvw8nn3xynHPOOaMMu8vlOYww3ze5rvbe1w8KgMlZsxF+UwEA8KMchpbhR21YYVYM5ayDAAA/hwooAACKHDaYTbxr4dOUMPwOAJgyaEIOANDE5bDFrHzKoYf1Z85bYoklRhk+CAAwIQRQAABN3Icfflg3Q2D93k85qxsAwMRgCB4AQBOXzd1nmmmm0tA9m6d37do1LrvssgYztAEA/ByakAMAAABQKRVQAAAAAFRKAAUAAABApQRQAAAAAFRKAAUAAABApQRQAAAAAFRKAAUAAABApQRQAAAAAFRKAAUAAABApQRQAAAAAFRKAAUAAABApQRQAAAAAFRKAAUAAABApQRQAAAAAFRKAAUAAABApQRQAAAAAFRKAAUAAABApQRQADCOPv3001h44YXjhRdemNSHAgAAUxQBFACMQ/DUs2fP6NKlS7m/4YYbxi677BLDhg2r2+buu++OLbbYIpZbbrlYaaWVyvqXX365wX6effbZ2GGHHcp+ll9++dhmm23i0Ucf/cnnf+ihh8q+l1122VhhhRXKvl966aUG29x1110lHBv5ssEGG4x2nzvttFNZ37t37wl8VQAAYNwJoADgJ/z1r3+NW2+9Ndq3b1/ut23bNu655544+OCDy/2bb745dtttt3jsscfi+++/j+HDh5f1m2yySQwePLhsM3To0Nh2223jgQceiK+++iq++eabePjhh0sg9eqrr47xuZ9//vn4y1/+UvY9YsSIEoblvnNfH330Ud12taqsPLZZZ5217jLzzDOPss8LLrgg7rvvvon+OgEAwJgIoABgLL7++uu4/fbbo1WrVnHjjTeWZWeeeWa0adMmhgwZEp999lldFVFWRv33v/+N+++/P2afffb4/PPP49JLLy3rMjTKYGrxxRcvFU0ZPuXtL774omw/JrfddlsJq1ZbbbV45JFHSoA122yzxfvvv9+geqoWQO2zzz5lf7VLnz596rZ54403Yt99940TTzyxstcLAABGp8VolwIARQZM3377baksmn766cuyueeeO5544olyOyueMkiaaaaZYtNNN43mzZvHjDPOGEsttVS8/fbb5VILsmqaNWvW4DnysWNSe1w+pva4rIQa+XG1AGreeecd47722GOPeO6558rxvvnmmw0qqAAAoEoCqPH01dffTOpDYDLXqmWL+Pqbbyf1YQATyTRt25VqpgyScrhcyp/x+r8Pjup1dN3tXJ4VS7WAas455yrLVl/j93HOuefGM888E51/7CX15RdfxHrrrRerrb7GKL9faueS9db7Y1x55ZXRr1+/0jcq952B2HbbbR+LLrZ4edyXX34Zr7/+enncSSedFHvuuWe0bz9ddNu4W+y88y4x1VRTlXXTtm8fO++yS+y4406x/h/XKwHUt99+53cbNFL+JgEmBucSfkrrVi1jihyC984775RGr3379i1DF2BKksUJP1QpTOojASamnj33KyHOwIEDy/1tum8dp59+WqmOGllWJx3d66jy+ywfs1G3bmX5LLPMEscde1y0aNGiBE95SR9/8knpCTWmc8mvFl44Djnk0LI8ny8rovI5PvrowxJEpUGDXiqVWOnFF18sz/vuu0PjvHPPjWOO/v/h2Pnn947dd9+jDCcEGjd/kwATg3MJE9NkFUBde+21seaaa8aAAQPi/PPPj65du5YhAuk///lPrL/++tGpU6dyPXLz1P79+5f/IueQh5xVKPty1HTv3r3066jvrbfeipVXXrn8N7v+sAgAGNlaa68d5/f+eyzfuXO5/95770WfCy6IHbbfLr777ru67TIEOvLII+KGG24o9//ylx1jwQUXLLefeebp6LF3j9K/6eZbbo1//fuuMhTu/vvuixNOOH6Mz33vvffGEUccHkssuWTcdXe/uPGmm2KOOeYsz9G37w/9naaZZprYYostY/PN/xT33nd/DHjo4dhpp53Luuuuu7ZuGGCtEgoAAJpsAJX/1T366KOjV69eZarp0047Leaff/449dRT47XXXit9K7p161aasW600Uax++67l2aqtTAp7+f6DLGy90bORlTrkTGyDz/8sARPc8wxR5x11ln+EwzAT+rcuXP06dO33N5yq62iZcuW8eyzz5YAKWUQdfBBB8UN119f7m+88Sax2+671z3+3HPPLVVPm22+ecw333wliNpu+x+G9PW7++4xPu8Zp59egq1tt90uOnToEB07LhCb/2nzBo/LZQcdfHAccuihMcMMM5Rlf95223Kdvwuff+65il4VAACYwgKol19+ucwElFVPKZu45lC8vJ/DGDbbbLMy5XQ2ft1uu+3Kf3ufeuqpsu0111wTSyyxRGy//fax0EILxbHHHlsqp3K2oJHlsL6dd945WrduXWYtylmMAGBMhg0bFv37PxgD+vevW9a9+zZlxrv06muvluuje/WK22+/rdzecsst4/AjjmjQbDz/mZLqL5tqqh9+DWcPpzF57cf9N3hc86kaPO6VwYPjzjvuKNXCNbXheWnaaaed4K8fAAAaVRPymWeeuVzX+mukHG5X0+XHhq3ZfDWnwc5hc7X1Tz75ZCy33HJ122aolMMasgFs7XG1P8Z79OhRpsG+/PLLo3379r/I1wbAlOvFF16IXXbeOaaeeur45z/vqFv+yaeflusOHWaNG264vgx1q1U+HXjQwaPsZ9555onXXn21VEitv/4GZVa9/AdKWmyxxcf4/PPMO2+8PGhQXHnF5bHSSiuV32U333zTD49b/IfHPdj/wTjh+OPL77V/XHJJqYi66MILy7pctuhii03U1wQAAKbYACqHw+X01TnULmcbymFxWdWUf6DX/+/xOuusU4Y5ZHXUXHPNVdeLI4cl1JdTU2flVE0OQTj44IPj/vvvj8suu2ysU16PTXPd1xibH98epVHfpD4WYKJYfvnlYq655o433hgSO+ywfVl2+mmnxZ133lGGu/12pZVik002qdv+nnvuiQce+P+VSMsv3zmOO+640pPpoYceisGDB8caq6/2QzPyL78sfZn22GP38vslf29ttdWW5XHnnXdeLPSrX8Xuu+1WfuflP2hWWfl3ZThe/jMm/9my4192LI/bYP0N4vLLLi/H2G2jjcq6WoP0ffbZJ6Zt126MX1/+WvO7DRohf5MAE4NzCY0xgErZA2qVVVaJc845J84444y45JJL4uyzzy6NxVP2dsoeT48//nj5Y37eeeeNtddeuwzdG7mPU96v31z8qquuKn+0L7DAAqXv04UXXthgOMO4+n4MfaUgNasXeHqrQOMwVYuWccKJJ8TePfYu4VG6445/xgwzzhgnn3RyvPb66zF06P//h8ewYR80ePywD4eV3x2dlloq/nHJpXHuueeUIeSff/ZZ+f22+x57xnLLdy7bfPPttzF06NDyuKx0ynPJ79dcK3r//YL4+997l15OX3/zTSy77LKx9z77xvwdO5bHTdu+ffS9sG+ZcOPhhx4uM+Qtssgise1228e666471t9ducrvNmh8/E0CTAzOJUxMzUaMqVP3JJR/QOcQultuuaVMJ52Nx0d21FFHxUsvvVRCqj/84Q+x9dZbl+blNXvvvXcZ1nfooYeWWfCyUWxum3/Qb7755nH44Yc32H5cffX1Nz/766PxykyzVcuW5QPi5PeTBfwcWVH04AMPxH779Ywj//rX6Np1ndKPcGL74P33Y7XVVo1//etfMfsccziXABPE3yTAxOBcwrho3arllNWEPP/j269fv7r7OfRu1113jUGDBsWjjz4a//3vfxtsn5VMOZtdmnXWWeP9999vsD7vzzLLLHX3M6BabLHFSt+oDKROOOGE0qgcAMZF/l5aa+21y+0uXX5TSfiUDc8POeTgMgR9nnnmmej7BwCASWWyCaAyZMoG4Z/+2NQ1ffDBB6VHRvbMyEqm+sVazzzzTHTs2LHcziEM+fiaHJKXFU+1oXsp91O/Oir7dhxyyCEN9gkAP+Wp/z0dc845ZyX7zt5N2Vj8/N69J2iYOAAATK4mmwBq5ZVXLv9NzlAo/wP8yiuvlCqlrl27xsYbb1wajZ900knx6quvlibiN998c+y8887lsbn+sccei969e5dheQcddFBpUF5/Brz68nmOOOKIGDBgQFx55ZW/8FcKAGMOoPbaq4dZWgEAaHQmmwCqXbt2JUB644034oorrogTTzyxVDhlUDTbbLNFnz59ygxAG2ywQQmgTj/99NInKmXYlH2jrrvuujIT0UcffVSal4/tv8fZ7Hy99dYrz2MoHgAAAEATa0KeM+Bl9dKYKpgmJU3IGRtN+oCJwbkE+LmcR4CJwbmERtmEvL4MnqrqrwEAAADAL+v/d+aejEyOlU8AAAAATJjJsgIKAAAAgMZDAAUAAABApQRQAAAAAFRKAAUAAABApQRQAAAAAFRKAAUAAABApQRQAAAAAFRKAAUAAABApQRQAAAAAFRKAAUAAABApQRQAAAAAFRKAAUAAABApQRQAAAAAFRKAAUAAABApQRQAAAAAFSqRbW7B4Cx27jbRjF48OBJfRiTnWbNmsWIESMm9WFMNjp27BjXXX/DpD4MAAAmkAAKgEkqw6cXBg2KtnPPOakPhcnUZ0PenNSHAADAzySAAmCSy/BptWsundSHwWTqnk23ntSHAADAz6QHFAAAAACVEkABAAAAUCkBFAAAAACVEkABAAAAUCkBFAAAAACVEkABAAAAUCkBFAAAAACVEkABAAAAUCkBFAAAAACVEkABAAAAUCkBFAAAAACVEkABAAAAUCkBFAAAAACVEkABAAAAUCkBFAAAAACVEkABAAAAUCkBFAAAAACVEkABAAAAUCkBFAAAAACVEkABAAAAUCkBFAAAAACVEkABAAAAUCkBFAAAAACVEkABAAAAUCkBFAAAAACVEkABAMAv6NNPP41OSy4RL7744qQ+FAD4xbT45Z4KAACadvDU66ij4t///le5v9mmm8TvVl45jjqqV8wwwwxl2T333BMXXdg3Bg0aFK1atYolllwy9tlnn+jYcYG6/Vx66SVxwvHHj7L/1VZbPU4/44wxPv+//vWv6H3+efHKK6/ETDPNFBtsuGHsvPMu0aLF//9I8Mgjj8SZZ5wezz33XEw77bSx9tprx1499o5pppmmbpt7+vWLHj32GmX/Cy+8cFxz7XU/4xUCoDFTAQUAAL+Avx1zdPzzn7fHtO3bl/tt27aN++69Nw477NBy/7Zbb40ee+0Zjz/+eHz//fcxfPjwsn7LLbYooVHNSz9WTrVv3z46dJi17jL9DNOP8blvu+222K/nvqXqKoOtd955J84/77w44ojD67b573//G7vsvFM8+eST0bJly/jggw/i8ssvjz332L0cT02tciuPv/7zZ6gFAGMigAIAgIp98803cccdd5Tw5+qrrynLTjn1tJi6TZt484034vPPP48L+lxQlv9x/fXjwf4D4q67+8Vss81W1l1x+eWjBEBHH/O3uOvuu+suf/3rUWN8/jPP/KEyascdd4oBDz0cJ59yarl/y803x7PPPFNun3P2WfHtt9/Gen/8Y/Qf8FBccullpTpq4MCBcffdd4/y/HvuuVeD5z/v/N4VvHIANBYCKAAAqNhnn31Wwp2sLJp++h8qleaaa6545JGBccONN8XUU08diy22WHTu0iW6dds4mjdvHjPOOGN06rRU2TYrllJWIr388svl9rzzzDNOzz1s2LB46803y+111l23XK+55prRsWPHcvuee+8p10//GER17do1mjVrFksttVT85jcrlGX3/rhNevGlHwKoeeadd6K8NgA0DXpAAQBAxTJ0mn322ePtt9+O3XbdZZT1GTgdc8zfRqmaeuqpJ8vtueeeu1y/9tpr8eWXX5bbBxx4QAx++eXo0KFDdO++TWyx5Zajfe4Mt2q+/uqruttZjZVe/XF439StW8eXX3wRX331db1tWpbr2hDAfO4hr79ebp9+2qmx7z57R/v200W3jbuVflJTTTXVBL5CADR2KqAAAOAX0LPnfiWgySFtaZvuW8fpp59WqqNGNmLEiDi611Gl8ikfs1G3bg36P6VBL71U1r3xxhtx7LF/iwsu+PtonzcbiM8733zl9kUXXRiffPJJaST+wgsvlGV5P2UFVrr8sstK1dSTTzwR/fv3b7DNoEEv1fWDyqF4+fzvvjs0zjv33Djm6KMn4qsFQGMjgAIAgF/AWmuvHef3/nss37lzuf/ee+9FnwsuiB223y6+++67uu0y4DnyyCPihhtuKPf/8pcdY8EFFyy3Z5111th0s81iu+22jwce7F96RW2wwYZl3d97946v6lU41bf33nuX6+xDtdKKK5RZ7Opmv2vWrFztseeeZdmjj/43Vl1l5ejefeu642oWzerCrC222DI23/xPce9995d+UjvttHNZd91115YKLwAYHQEUAAD8Qjp37hx9+vQtt7fcaqvSE+rZZ5+N+++7ryzLwOfggw6KG66/vtzfeONNYrfdd697/FJLLx2HHXZ47LPvviUMysBo6+5bl3VffPFFvPrq/58tr7411vh9nHTyKaWv07zzzhs77LBDrLrqamXddO2nK9dLLLFkaSSeAVkO+dt0003LpWwz3Q8z93XsuEAcdPDBccihh8YMM8xQlv15223rqraef+65il45AKZ0ekABAEDFckjb888/VyqJVlhxxbIs+zZ98/XXcc0118Srr71alh3dq1fcfvtt5faWW24ZBxx4UGkIXvPcc8+Vfkw5O96vf/3rsiybm9dMO+0PQdHorLTSSvG73/0u2rRp88Pzb71VuV5woR+qq9KSSy4ZZ5xxZrRt27bcP2D//X/YZsGFyvUrgweXoXfTtG1b9jXq80/7s18rABonFVAAAFCxF194IXbZeecy9O2D99+vW/7Jp5+W6w4dZo0bbri+DGOrVT4deNDBDcKndMvNN8eBB+wfhxx8cLz77rsl/PnHxf8o67KyKRudj84uu+wcK/ymS5x88knl/n8HDoynn3667H/11dcoy3od9dfo0nn5OOCA/UslVs6295//3F/Wrb7GD9s82P/B+L//2y8Oygbog3+Yje+iCy8s1+3bt49Ff+wjBQAjazYia2UZZ199/c2kPgQmY/k3YquWLePrb74JP1kwbpZZeql446svYrVrLp3Uh8Jk6p5Nt465WreJx5/4YTYwmBLljHYbrP/H0jC8Y8eOMXjw4OjatWvceeedZYa8G2+6OTbbdJMYOnRo2X7GGWeMFi1+mIEude68fPzt2OPK47f40+YxfPjwMvwuh/Dl0LtsBn7GmWeVqqRsXL711luVrk1nnnVWLLLIonHbbbeV0KhWpVRrKp79nHJIXXr00UdLP6rsQdWuXbvSHD0/Kqyy6qpx5plnlW0+Hj48/vSnzctx5Mx9WU1Va6J++OFHxCY/DtkDGgefbxgXrX+cMfWnqIACAICKZVB0woknlkqnDJ9qDcGnn2GGOPnkU2LIkCF14VNtyF7OLle7DPvww7J8rrnmiosu/kesscYaMd1005WAKPs6nXPOuXVD4rJ66d2hQ8v+asPj/vCHP8TBBx9SqqSyUfk888wTPffbLw448MC651x22WXjhBNOjIUWWqgEZtnw/C877hgnnXRy3Tbtp5su+l54Yfxx/fVj5plnKdstssgicdzxJwifABgrFVDjSQUUY+M/BDD+VEDxU1RA0ZhktdCDDzwQ++3XM47861+ja9d1SjPxie2DD96P1VZdNW67/faYe+55Jvr+gabB5xvGhQooAACYzGRz77XWXrvc7tLlN5WET1k9lT2isqF4zmYHAJMDs+ABAMAv7Kn/PV3ZvrMv0+KLLx477bRTaTKuagGAyYEACgAAGpEMoPbq0aNu2AwATA4MwQMAAACgUgIoAAAAAColgAIAAACgUgIoAAAAAColgAIAAACgUgIoAAAAAColgAIAAACgUgIoAAAAAColgAIAAACgUgIoAAAAAColgAIAAACgUgIoAAAAAColgAIAAACgUgIoAAAAAColgAIAAACgUgIoAAAAAColgAIAAACgUgIoAAAAACrVotrdAwBAtTbutlEMHjx4Uh/GZKdZs2YxYsSISX0Yk5WOHTvGddffMKkPA6BJEkABADBFy/Dp+ZcGxTRzzD2pD4XJ2OdvDZnUhwDQpAmgAACY4mX4tOL5KlsYs/47bzSpDwGgSdMDCgAAAIBKCaAAAAAAqJQACgAAAIBKCaAAAAAAqJQACgAAAKYgn376aXRacol48cUXJ/WhwDgTQAEAAMAUEjwdsP/+sfLvflvub7bpJrHnnnvEhx9+WLfNPffcE3/epnustOIKsdqqq5T1gwe/PMZ9XnP11SXM6rr2Wj/5/C+99FLs+Je/ROfllyvHcOSRR5Rjqu/SSy8p+xv50mOvveq2+eCDD+Lwww6NNX+/RnTuvHxssnG3uPEGM5k2dgIoAAAAmAL87Zij45//vD2mbd++3G/btm3cd++9cdhhh5b7t916a/TYa894/PHH4/vvv4/hw4eX9VtusUW88soro+xv6NChceqpp4zTc2fI9Zcdto+HH34omjVrVoKn66+7Lvbbr2eD7V76sSqrffv20aHDrHWX6WeYviwfMWJECaNuvPHGEkS1bNGyVHIdfvhhcdttt/3s14jJlwAKAAAAJnPffPNN3HHHHdGqVau4+upryrJTTj0tpm7TJt584434/PPP44I+F5Tlf1x//Xiw/4C46+5+Mdtss5V1V1x++Sj7POboo0epYBqTK6+8ooRQCyy4YNx73/1xxZVXRvPmzaP/gw/G//73v7rtasMCjz7mb3HX3XfXXf7616PK8ldffTWeeurJmHrqqeOWW2+LB/v3jz/9aYuy7q5//2sivFJMrgRQAAAAMJn77LPP4ttvv42WLVvG9NP/UE0011xzxSOPDIwbbrypBDqLLbZYdO7SJbp127iEQzPOOGN06rRU2fadd95psL877vhn3HvvPSXQGhcD+vcv12uttVa0adMmFl54kfJ89ddl1dXLL/8w3G/eeeYZ7X6++frrutt5jLWqqJTHS+PVYlIfAAAAADB2GTrNPvvs8fbbb8duu+4yyvoMc4455m+jVE1ltVGae+6565Z/9NFHcdyxx0br1q1jm23+HH//e++ffP7XXnutXM8666x1y2affY54+umn49XXXq3b5ssvvyy3DzjwgBj88svRoUOH6N59m9hiyy3L8l8tvHAs8+tfx+OPPRZ/WHedaNNmmvjkk49jkUUXjV123W0CXx2mBCqgAAAAYArQs+d+MdVUU8XAgQPL/W26bx2nn35aqY4aWVYVHd3rqFL5lI/ZqFu3unUnnHB8DBs2LHbeZZeYZwyVSiP75JNPyvXUU7epW9Z66tbl+tNPPm3Q/ykNeuml8rxvvPFGHHvs3+KCC/5et+74408ofaGyoivDp1pl1Mcf/3CbxkkABQAAAFOAtdZeO87v/fdYvnPncv+9996LPhdcEDtsv1189913ddvlULicoe6GH2eW+8tfdowFF1yw3H7ggQfi1ltuiYUWWij+/OdtJ+rxZXXUppttFtttt3088GD/0odqgw02LOv+3rt3fPXVVyVk2vEvO8SwYR/EeeefX7bZeONNytC9ffbeuxw7jZMACgAAAKYQnTt3jj59+pbbW261VekJ9eyzz8b9991XlmUQdfBBB8UN119f7me4s9vuu5fb2Yy811F/LcP1jjjyr+Wx46pdu3bl+quvfhhil2rD7dpN+8O6pZZeOg477PDYZ999Y5pppokWLVrE1t23Luu++OKLePXVV+KGG64vQ/VWXHHFWHHFlWLaaaeNvXr0KNvk+loPKRofPaAAAABgMpdD5p5//rloFs1ihRVXLMuyt1IOXbvmmmvq+jAd3atX3H77beX2lltuGQcceFA0a9as3H/mmadLD6m09VY/9GSqeeutt6LTkktEn759Y/nlf6iwqm+uuecuvaNqj09Dhw4t1/PNO1+5fu655+KVV14pM+/9+te/LstymF3NtNO2j9de/aGXVPx4TGmqH5uR1w+1aHxUQAEAAMBk7sUXXohddt45evTYKz54//265Z98+kP/peyplNVF1113bV3l04EHHVwXPqVWLVuV7epf2rdvX9Zlv6a8n9uMTpcfh/3deccdpZrpxRdfjGefeaYsqwVit9x8cxx4wP5xyMEHx7vvvlvCp39c/I+ybt555y1N1OeZ94eeUw8NGFAqt9Kll11arnMmvwUWWGCiv3ZMHia7CqhskHbiiSfG4osvHn/6059K2R4AAAA0Zcsut1zMNddcpan3DjtsX5adftqpceedd8YMM8xQhrRttukmddvfc0+/+M9//lN3v3Pn5eNvxx4Xd919d4P93nTjjXHYYYeW/k133PmvsuzJJ56Inj17RjT7YX27aaeNLbbcKq699tpS4bTaqqvE119/XYb75fMuueSSdUMCb7nl5njzzTei69prlSF+GVZluLX/AQeWMKzbRt3iissvL5VUf9p8szK079MfQ7Ttt99BBtCITVYVUPlmXnPNNWPAgAFx/vnnR9euXePNN98s65544okSSC2zzDKx9tprlxLD+tZff/1YeOGFG1wykU2rr756XP/j+NeaTFqzJPCggw4qswMAAADA5CrDnBNOPLFUKQ0ePLgsu+OOO2L6GWaIk08+JYYMGVI3JK42ZO/dd4fWXYZ9+OE4P9fX33z9w+OGDq37vNyhQ4foe+FFscIKK5RG4RkUbbjhhnHiSSfXPS4Dsosu/kesscYaMd1005XHLrXUUnHOOefG7373u7JN++mmi8suv6I0K8+KqGxMPu9888VBBx0cu+y660R8xZjcNBsxmaQvOW3kSiutFEceeWT5wVluueXivPPOi1lmmSUOOOCAWHfddWOLLbaIbt26xTPPPFOCozPOOCNWXXXVkrpmMNWnT5+Yb74fxp6mTIGz6VkGUHvssUd5bHr99dfLvpZddtk49dRTSxo7rr76+ptKvn4ah6xubdWyZXz9zTcxefxkweRvmaWXije++iJWu+aH0msY2T2bbh1ztW4Tjz/x5KQ+FCbj88jrn30VK57/w2xPMDr9d94o5mnb2rmEKV5+dn7wgQdiv/16xpF//Wt07bpOJVVDTz31VPx5m+7x+OOPR7PmzX2+YYxat2o5ZVVAZaf7LM3LqqeUXfmz5C/v33XXXTHzzDPHvvvuWwKmP/zhDyVpveWWW8q2WYL4zTffRKdOnUpgVbtk+DSy999/P3bYYYdYZJFF4qSTThqv8AkAAAAmpbZt28Zaa69dbnfp8ptKwqchQ16Pvx1zdKy+xhrRunXrib5/mqbJpgdUBkxp4MCBdcsyUKoFTIsuuugoj6mNEx00aFAp3fupH4zcfscdd4yZZpopzjrrrGjVavTN1QAAAGBy9tT/nq5s361atY7VV18jtt1u28qeg6Znsgmg5phjjth0003LULkMkzIcWmKJJUq6m+NI81LzwQcfxG233RZ77rlnXfVUjofdeeed4+mnn475558/9t9//7oAK2WFVO77+eefLxVVbdq0maDjbF5vBgEYxY9vj2yu550C48pPC+Oimd/BjIX3BuPKuQTGxeyzzRa77LKLzzc0zgAqHX300bHKKqvEOeecU/o7XXLJJXH22WeXpmU1X375ZQmesmJq8803L8uyC//w4cNLgLXXXnvF1VdfHX/+85/j9ttvL2FWyv1ld/1a9dOxxx47Qcf4vYGvjEXtpJyt1bxVYFz5YWFcjPA7mLHw3mBcOZfA+PD5holpsukBVZOz4GXT8DPPPDOWX375OPjggxs0W8sqp1dffbXMklerYurVq1epavr9738fiy++eGlknhVTN910U91jsx9U375947DDDisz4t1///2T5OsDAAAAaGommwAqp4vs169f3f0cerfrrruW/k5Z3ZT9m7J5+EsvvRQXX3xxg9nuMlzK6qaaLA/s2LFjgykoc/jdnHPOGWuvvXYJqjKI+uSTT37BrxAAAACgaZpsAqhHH300evToUddYvNbrKcOlrHTKACmbkeewvIUWWqjBY7t3716G1dV8//338cILL5QQqqb+bHeHH354eZ4JHYYHAAAAwBQYQK288spl+shDDjkkhg0bVvo6nXDCCdG1a9e48cYb4+GHHy49otq3bx/vvfdeuXz00UflsTlk76KLLoq77747Bg8eHEcddVSpbtpoo41G+1yzzjpr9OzZM6677jpD8QAAAACaShPyHELXu3fvEh7deeedZQjeqquuGkcccUTss88+paop+z/V17lz51IRte2228ZXX31VAqr333+/NC2/8MILGwzLG9kWW2wRt9xySxmKd+utt8a00077C3yVAAAAAE3PZBNApQyOsiopZ6zr0qVLuaQ+ffqM9XHZ8ymniCzTRI5G/d5S9R9zxRVXTKQjBwAAAGCyH4JXXwZP2TAcAAAAgCnfZFUBVVOrfAIAAABgyjdZVkABAAAA0HgIoAAAAAColAAKAAAAgEoJoAAAAAColAAKAAAAgEoJoAAAAAColAAKAAAAgEoJoAAAAAColAAKAAAAgEoJoAAAAACoVItqdw8AAACTv427bRSDBw+e1Icx2WnWrFmMGDFiUh/GZKNjx45x3fU3TOrDmCIJoAAAAGjyMnx6adBLMcvcc0zqQ2Ey9d6Qtyb1IUzRBFAAAAAQUcKnw649Z1IfBpOpXpvsNqkPYYqmBxQAAAAAlRJAAQAAAFApARQAAAAAlRJAAQAAAFApARQAAAAAlRJAAQAAAFApARQAAAAAlRJAAQAAAFApARQAAAAAlRJAAQAAAFApARQAAAAAlRJAAQAAAFApARQAAAAAlRJAAQAAAFApARQAAAAAlRJAAQAAAFApARQAAAAAlRJAAQAAAFApARQAAAAAlRJAAQAAAFApARQAAAAAlRJAAQAAAFApARQAAAAAlRJAAQAAAFApARQAAAAAlRJAAQAAAFApARQAAAAAlRJAAQAAAFApARQAAAAAlRJAAQAAAFApARQAAAAAlRJAAQAAAFApARQAAAAAlRJAAQAAAFApARQAAAAAlRJAAQAAAFApARQAAAAAlRJAAQAAAFApARQAAAAAlRJAAQAAAFApARQAAAAAlRJA0WR8+umn0WnJJeLFF1+c1IcCAAAATYoAiiYRPB2w//6x8u9+W+5vtukmseeee8SHH35Yt80999wTf96me6y04gqx2qqrlPWDB7/cYD8fDx8ehx16aPx2pRWjc+flY5dddo5XBg8er2M5/bRTSwi2/XbbjnGba66+umzTde21xrjNgw8+WLbJCwAAAEzuBFA0en875uj45z9vj2nbty/327ZtG/fde28cdtih5f5tt94aPfbaMx5//PH4/vvvY/jw4WX9lltsEa+88krdfvbZZ++46aYb4/PPPy/3+z/4YOyww/bx8ccfj9NxPP/883HxxRePdZuhQ4fGqaeeMtZt8vl7HfXXcXpOAAAAmBwIoGjUvvnmm7jjjjuiVatWcfXV15Rlp5x6Wkzdpk28+cYbJcy5oM8FZfkf118/Huw/IO66u1/MNttsZd0Vl19e1v134MAYOHBgtGzZMq6/4cbo1++emHPOueL999+Pa6+99ieP49tvv40jDj+8XI/NMUcfXSq2xuaM00+Lt956azxeBQAAAJi0BFA0ap999lkJfTI4mn766cuyueaaKx55ZGDccONNMfXUU8diiy0Wnbt0iW7dNo7mzZvHjDPOGJ06LVW2feedd8p1/wH9y/XSSy8d8803X7Rr1y7WXGvNsmxA/x/Wjc0/Lr44nnvu2RKEjckdd/wz7r33nrFu8+QTT8SVV1451m0AAABgciOAolHL0Gn22WcvQdRuu+4yyvoMnI455m9xwQV9Ytlll62rmnrqqSfL7bnnnrtcv/bqa+V61llnrXvsHLPP8cO6114d6zG89tprce6555Rga5NNNhntNh999FEcd+yx0bp16/jzn0ffHyqP64gjDo8RI0bEjjvuNI6vAAAAAEx6AigavZ4994upppqqDKFL23TfOk4//bQSSo0sw52jex1VKp/yMRt161aWf/rpJ+V66qnb1G3beurWP64b85C53N+RRxwRX331Vex/wIHRfrrpRrvdCSccH8OGDYudd9kl5plnntFu0/v882Pw4MGx8cabxK+X/fV4vQYAAAAwKQmgaPTWWnvtOL/332P5zp3L/ffeey/6XHBB7LD9dvHdd9/VbZcNyI888oi44YYbyv2//GXHWHDBBX/Wc19zzdXx6KP/jd/+9nex7rrrjnabBx54IG695ZZYaKGFxlj99OKLL0afPhfEzDPPHHvvs8/POiYAAAD4pQmgaBI6d+4cffr0Lbe33Gqr0hPq2Wefjfvvu68syyDq4IMOihuuv77czyqj3Xbfve7xbdu1K9dffvVl3bIvv/jhdvaDGtOMdqedemq0adMmDjn0hxn3xjSjXQ4FPOLIv5bjGlke25FH/NDA/MADD4r2P87mBwAAAFOKFpP6AKBKOazt+eefi2bRLFZYccWyrHv3beKbr7+Oa665Jl79sX/T0b16xe2331Zub7nllnHAgQdFs2bN6vYz91w/9IJ65+23GwRMad555xvtcz80YEDd8Lx1uq7dYN1///vf6LTkEtGr19Hx9o/73HqrLRtskzPd1bZ5+umny7L99usZkZd6attssOGGE/gqAQAAQLUEUDRqL77wQuyy885ltrt//vOOuuWf/BgMdegwa9xww/Vx3XXX1lU+HXjQwaPsJ2fJu/DCvvHEE0/EK6+8ErPMMkvcdde/y7pasDWyrHzK/df32Weflt5TWek0wwwzxlxzzz3KNl9++UV8/PHHpQfVTDPNPNptvvnm6/jwww/rvoZ8LgAAAJhcCaBo1JZdbrmYa6654o033ogddti+LDv9tFPjzjvvjBlmmCFWXHHF2GzT/z8z3T339Iv//Oc/dfc7d14+/nbscbHCCivE0ksvXQKojbttVAKkL774ImaaaabYZOONy7bZuHzrrbeKrJs686yzSu+pvNR3zjlnx3nnnhtLLbVU9L3worLsrrvvbrDNTTfeGIcddmiZce+OO/812m0GDnwkdth++9GuAwAAgMmNHlA0ahkUnXDiiaVKKGeQS3fccUdMP8MMcfLJp8SQIUPqhtLVhuy9++7QusuwH6uMskfTWWedHRtttFFMM800ZXa7DKUu6NO3bma77NX07tChZX/ZrwkAAAD4gQooGr0lllgybrr55njwgQdKD6Uj//rX6Np1nRIkpaf+90N/pZ+SQdNfj+pVLqMz55xzxj333hurrbpqTPdjKDWy3XbbvVzGJns5/VQ/p+WX7zzOxw0AAACTmgoomoS2bdvWDYfr0uU3deHTxJTVU4ccfHAsueSSMffcPzQtBwAAAFRA0cRUWTWUjcAXX3zx2GmnncoMeiNGVPZUAAAAMEURQMFEDKD26tEjWrVsGV9/882kPhwAAACYbBiCBwAAAEClBFAAAAAAVEoABQAAAEClBFAAAAAAVEoABQAAAEClBFAAAAAAVEoABQAAAEClBFAAAAAAVEoABQAAAEClBFAAAAAAVEoABQAAAEClBFAAAAAAVEoABQAAAEClBFAAAAAAVEoABQAAAEClBFAAAAAAVEoABQAAAEClBFAAAAAAVEoABQAAAEClWlS7exqzjbttFIMHD57UhzHZadasWYwYMWJSH8ZkpWPHjnHd9TdM6sMAAABgEhFAMcEyfHr5hRdiwemmm9SHMtlpNqkPYDIyaPjwSX0IAAAATGICKH6WDJ+e2XLLSX0YTMYWv/zyUA8GAADQtOkBBQAAAEClBFAAAAAAVEoABQAAAEDTCqDeeeed6NmzZ/Tt2zc+//zzSX04AAAAADSmAOraa6+NNddcMwYMGBDnn39+dO3aNd58882y7oknnog//elPscwyy8Taa68d11xzTYPH9u/fP9Zbb71YaqmlYptttokhQ4bUrevevXuceeaZDbZ/6623YuWVV44ddtghvv7661/oKwQAAABoeiabAOqzzz6Lo48+Onr16hVbbLFFnHbaaTH//PPHqaeeGu+9917suOOO0blz57jhhhtir732Ktvde++9dWHS7rvvHt26dSsh1owzzhi77bZbjBgx+rm3PvzwwxI8zTHHHHHWWWdFq1atfuGvFgAAAKDpmGwCqJdffjm++OKLUvWUmjdvXobi5f277rorZp555th3331jvvnmiz/84Q+x4YYbxi233FK2zWqoJZZYIrbffvtYaKGF4thjjy2VU4888sgoz5PD+nbeeedo3bp19O7dO9q0afOLf60AAAAATUmLmExkwJQGDhxYt6xTp07l+o033ohFF110lMd8+umn5frJJ5+M5ZZbrm55hkqLL754GbbXpUuXuuXffvtt9OjRI4YPHx6XX355tG/fvtKvCQAAAIDJKIDK4XCbbrpp7LHHHjH77LOXYXFZ1dS2bduYa665yqXmgw8+iNtuuy323HPPcj+H6HXo0KHB/maaaabS0Lwmh+MdfPDBcf/998dll11W1k+I5s2aTfDX2Ph4LRhXzfzsMBbeG4wL5xHGxnuDceVcwth4bzAunEem+AAqZQ+oVVZZJc4555w444wz4pJLLomzzz67NBav+fLLL0vwlBVTm2++eVmWQ/dG7uOU9+s3F7/qqqvi+++/jwUWWKD0fbrwwguj2QS8ab4fQ1+ppslrwbga4WeHsfDeYFw4jzA23huMK+cSxsZ7g3HhPDLF94CqyVnwVl999TJr3fLLL1+qluo3Ks/+Ta+++mqZJa/Wvyn7OY08k13er9/fKYOrPn36xHHHHRcPP/xwXHnllb/gVwUAAADQdE02AdTQoUOjX79+dfdz6N2uu+4agwYNKj2bst9Tzlz30ksvxcUXX1yakdfMOuus8f777zfYX96fZZZZ6u5vvfXWsdhii5W+Ut27d48TTjihNCoHAAAAoIkEUI8++mhpEF5rLF7r9dSiRYtSyZS9obIZeQ7Ly5nu6sshevn4mhyS9+yzzzYYupf7qdl7771jhhlmiEMOOaT0hgIAAACgCQRQK6+8ckwzzTQlFBo2bFi88sorpUqpa9euceONN5Zhc9kjKmeuy6bjefnoo4/KYzfeeON47LHHonfv3qVC6qCDDipNy+vPgFdfPs8RRxwRAwYMMBQPAAAAoKkEUO3atSsBUlY5XXHFFXHiiSdGx44dS1B05513lgbi2f/pt7/9bd2lNgtehk3ZM+q6666LTTbZpART2bx8bE3Gs9n5euutV57HUDwAAACAJjILXg6ZyxApZ8DL6qVaBVM2D/8pGSjlZXRy2N7onHzyyT/ziAEAAACYYiqg6svgac4555zUhwEAAABAY6uAqhlT7yYAAAAApjyTZQUUAAAAAI2HAAoAAACASgmgAAAAAKiUAAoAAACASgmgAAAAAKiUAAoAAACASgmgAAAAAKiUAAoAAACASgmgAAAAAKiUAAoAAACASgmgAAAAAKiUAAoAAACASgmgAAAAAKiUAAoAAACASgmgAAAAAKiUAAoAAACASgmgAAAAAKiUAAoAAACASgmgAAAAAKiUAAoAAACASgmgAAAAAKiUAAoAAACASgmgAAAAAKiUAAoAAACASgmgAAAAAKiUAAoAAACASgmgAAAAAKiUAAoAAACASgmgAAAAAKiUAAoAAACASgmgAAAAAKiUAAoAAACASgmgAAAAAKiUAAoAAACASgmgAAAAAKhUiwl50Ndffx2PPvpoPPLII/HOO+/EBx98EC1atIgZZ5wxFlxwwVhmmWViySWXjObN5VsAAAAATd14BVCvvPJKXHzxxXHTTTfFl19+OdZtZ5pppthggw1i0003jfnmm+/nHicAAAAAjTmA+uSTT+KEE06I6667LkaMGFEuNVNPPXW0a9eu3P74449LdVR6//33o2/fviWw2nzzzWOvvfaK6aabrqqvAwAAAIApOYBaZ511SqDUrFmzWGqppWKNNdYow+wWXnjhmHbaaRts+9FHH8Xzzz8fzz77bAwYMCAeeuihuOyyy+K2224rtwEAAABoWsYpgPr2229jp512ii222CJmn332sW47/fTTx29+85ty2X777WP48OFx7bXXxqWXXjqxjhkAAACAxhZA3X///dGqVasJeoIcdrfDDjvEtttuO0GPBwAAAKAJBFA/FT5l36cnnniiDNPLYXkLLLDAKNtMNdVUE36UAAAAADSNWfDuuuuuOOecc+If//hHXePxZ555pjQYf+utt+q2W3fddeP444+PFi3Ga/cAAAAANELNx3XDu+++O/bcc8947rnn6sKm77//Pnr27Blvvvlm3ex4ebn99tvj9NNPr/K4AQAAAGhsAdRpp51WwqUcXjf11FOXZf/5z3/i1VdfLbPj7bHHHvH444/HKaecEs2bNy9VUl999VWVxw4AAABAYwmgrrnmmnjppZfKkLpNN900HnvssbjxxhvjsssuK+tbtmxZZse78847Sz+oueeeu1yfe+65ZTsAAAAAmq5xatJ02GGHlSqnb7/9No499ti65bksLxk2HXLIIQ2Wp/POO6/c3nDDDas4dgAAAAAaSwXUU089VYbd5eVf//pXPP/883HMMceUIXnp/PPPL8vycsstt9QN0bvkkktKzygAAAAAmq5xqoBq1apV/Pa3v41///vfpZqpY8eO8eyzz5bqprnmmitWXnnlst1FF10UZ555Znz55Zcx00wzxTLLLFP18QMAAADQWJqQH3rooTHbbLPF559/Hk8//XSZAa9t27Zx6qmn1g25e+KJJ+Kzzz6LqaaaKo488sjSMwoAAACApm2cE6JZZ501br311rj++uvLzHdzzjlnqYbKSqeaeeedN1ZbbbXYeeedY+mll67qmAEAAACYgoxXiVK7du1im222GeP6ffbZZ2IcEwAAAABNbQheNhx/7bXXJvhJ3n333TJUDwAAAICmZ5wqoHI2u8suuyx+85vfRNeuXWONNdZoMPRudN5666148MEH495774377rsvvvvuOxVSAAAAAE3QOAVQJ510Uhx33HHRv3//GDBgQGkwnrPfLbzwwtGhQ4cyNC8bkWcD8rfffjtefPHFGDJkSHnsiBEjSv+offfdt+qvBQAAAIApNYBab731SnPxyy+/PC699NIYOnRovP7663Uh08gydErzzTdf/OlPfyqXqaeeeuIeOQAAAACNqwl527ZtY8cdd4ztttsu/vvf/8YDDzwQjz76aOnvNGzYsLLNjDPOGPPMM08su+yyseKKK8avf/3rKo8dAAAAgMY2C155QIsWpRdUXgAAAABgosyCBwAAAAATSgAFAAAAQKUEUAAAAABUSgAFAAAAQKUEUAAAAABUSgAFAAAAQKVa/JwHDxkyJOaee+5y+5lnnomrrroqWrZsGdtss03MO++8E+sYAQAAAGhqAdTXX38dPXr0iBdeeCH69etXgqitttoqvvrqq7L+1ltvjWuvvbYunAIAAACg6ZqgIXhnn3123HPPPfH222/H559/HldccUV8+eWXMWLEiHL5+OOP49xzz534RwsAAABA0wig/v3vf0ezZs2ia9eu0aJFi/jPf/5T7p9xxhmx//77lxDqoYcemvhHCwAAAEDTCKDefPPNcr3bbruVyqdBgwbFVFNNFauuumqsvvrqZd177703cY8UAAAAgKYTQH333Xfleuqpp45HH320VDwtuuii0apVq9IfKk0zzTQT90gBAAAAaDpNyDt06FD6P2UfqP/+979l+N1vfvOb+Pbbb+PCCy8s28w555wT+1gBAAAAaCoVUCussEKpejr22GNLP6iU/aCuueaauOGGG0ogtc4660zsYwUAAACgqQRQe+21V8w+++x1s95ttNFGsfjii8d8881X1i+55JKx1VZbTexjBQAAAKCpDMGbddZZ45Zbbon+/fvH9NNPH507dy7LF1xwwejRo0d0795dDygAAAAAJjyASu3atYu11lqrwbJZZpkldt111wndJQAAAACN0AQNwau56667Yo899ojVVlstll566bLs+OOPj/fee29iHR8AAAAATbECKvs+7b///nHrrbfW3c/G4x9//HGZBS+DqSuvvDJmmmmmiX28AAAAADSFCqjLLrus9IDK4GmeeeapW/7qq6+W6zfeeCPOP//8iXeUAAAAADStAOraa68tFU+77757/OMf/6hb3qlTp1IZlcHUPffcMzGPEwAAAICmFEC98sor5fqPf/zjKOvWXHPNcj106NCfe2wAAAAANNUAqlWrVuX67bffHmXdyy+/XK7btm37c48NAAAAgKYaQC2zzDJlmN2RRx4Z//rXv+qW33bbbdGrV68yPG+ppZaamMcJAAAAQFMKoLL3U4sWLeL111+Pv/3tbyVwSvvtt1+8+eab0bx58/jLX/4ysY8VAAAAgKYSQGV10+mnnx4zzDBDqYSqf5l55pnjhBNOiOWWW27iHy0AAAAAU5wWE/rANdZYI1ZaaaV48MEH49VXX42pppoq5p133lhhhRVi6qmnnrhHCQAAAEDTC6BSBk0ZRAEAAADAzwqgzjrrrJgQe+yxxwQ9DgAAAIAmGEDVGo2PDwEUAAAAAOM8BC8bjI+PCQmsAAAAAGiiAdQ//vGP6o8EAAAAgKYbQHXu3Ln6IwEAAACgUfpZs+B98cUX8fHHH8d3331Xtyxvv/fee9GvX7/Yb7/9JsYxAgAAANDUAqhPPvkk9tprr3j44YfH2htKAAUAAADABAVQZ599dgwYMGCs2zRv3nxCjwkAAACARmSCUqJ77rmnzHK32GKLxVprrVVub7HFFrHllltGmzZtyv2+fftO/KMFAAAAoGkEUO+++2657tWrV+y///5lGF42Kj/88MPjoIMOKvcvv/zyiX2sAAAAAEyBftY4uemmmy7mmmuumHHGGeOpp54qy7p06VKua/fH1zvvvBM9e/YsFVSff/75zzk8AAAAAKbUAKpDhw7l+sYbbyzXSyyxRNx5553x/vvvl+v00Ucfjfd+r7322lhzzTVLf6nzzz8/unbtGm+++WaDbV577bXo1KnTKI9df/31Y+GFF25wefHFF8u61VdfPa6//voG2z/77LPx61//uq5iCwAAAIDJqAl5BjoXXnhhaUa+zjrrlKqn+++/P373u9+V9dkDau655x6vfX722Wdx9NFHl2F9Q4YMieWWWy7OO++8OPXUU+Okk04q27z99tux8847x1dffdXgsd999128+uqrcemll8Z8881Xt3yGGWYY7XO9/vrrseOOO8Zvf/vb8px5vAAAAABMRhVQu+66ayy55JLRokWLEvhsuOGG0b59+1JJVLtss80247XPl19+Ob744otS9VQOrHnzMhSvdv+uu+6Kbt26RatWrUZ57BtvvBHffPNNqYyaZZZZ6i55fCPLKq0ddtghFllkkRJsTTXVVBPyEgAAAABQZQVUhk1XXXVVDBw4sAQ4M800U1x22WXRp0+fUsmUM+P98Y9/HK99zjzzzOU691lTf6jdvffeGz169Ij5559/lHBr0KBBMfvss0fr1q3H+hyffvppqXzK4z3rrLNGG2YBAAAAMBkEULUKpVrD8bTQQgvFcccdN8EHMsccc8Smm24ae+yxRwmTMhzK3lJt27Yt63OoXHr44YdHWz3VsmXLMjzv6aefLiFVzs5XP8DKCqnc9/PPP1+qqdq0aTNBx9nccL16vBaMq2Z+dhgL7w3GhfMIY+O9wbhyLmFsvDcYF84jv1gAlcPr6vdM+v7770v/pwyBcla81VZbrVQYTYgMmVZZZZU455xz4owzzohLLrmk9Jlaaqmlxvq4V155JYYPH14CrL322iuuvvrq+POf/xy33357CbNS7q9du3Z11U/HHnvsBB3j9xqW1+O1YFyN8LPDWHhvMC6cRxgb7w3GlXMJY+O9wbhwHqk8gMqAKQOiHCL3wAMPxPTTTx9Dhw6NnXbaqW62uZSVS0ceeWRstNFGE3RAOQteVintueeeccstt8TBBx8ct91221gfk43Lv/zyyxIwpXz+xx57LG666abYZZddfvhCW7SIvn37lgqpDKmyefrKK688QccIAAAAwERuQv7uu+/GVlttFQMGDCgzztUcdNBB8cILLzRoPp4z1B166KHxzDPPjMdhRAmz+vXrV3c/h95ls/Ps75TVTWOT4VItfEpZodWxY8eyz5ocfjfnnHPG2muvHb///e/jsMMOi08++WS8jhEAAACAigKoCy64ID766KNye8EFFyxVTlml1L9//xL2ZPPvo446qlQi5TC8HJZ36aWXjteBPProo6XJeDYKr/nggw9KuDTNNNOM9bHdu3cvw+pq8vkzGMsQqqb+bHeHH354eZ4JHYYHAAAAwEQOoB588MESNG2xxRZlWFwGQv/+97/r1m+44Yax2WablR5MPXv2LJVQ9WezGxc5HC73e8ghh8SwYcNKX6cTTjghunbtWhqMj83qq68eF110Udx9990xePDgEoZlddOYhgHOOuus5Tivu+660r8KAAAAgEncA+qtt94q15tvvnndsoceeqjudjYer1l22WXrqpfGRw6h6927dwmP7rzzzjIEb9VVV40jjjjiJx+77bbblqF/2aPq/fffL03LL7zwwgbD8kZWC9NyKN6tt94a00477XgdLwAAAAATMYCq9X1q06ZNuc7ha08++WTd0Lbll1++bttvvvmmwWPGRwZHWZWUM9Z16dKlXEaWy3J4XX1ZnZXNxmsNx0dWv7dU/cdcccUV432MAAAAAFQwBG/mmWcu17XG4v/85z/j22+/LSFOp06dSrXSyGFPhw4dYkJlyJQNwwEAAABoIhVQyy23XNx8882lafdTTz0V119/fd26P/zhD+X67bffLtVL5513Xgmm6ldFja/RVT4BAAAA0IgroLbffvsy1C77K1188cWlwXfKKqVNNtmk3D7wwAPj7LPPLpVRue2f//znao8cAAAAgMYTQC2yyCJx6qmnxowzzlhmuMtLLrvggguidevWZZtZZpmlLM/72Qw81wMAAADAOA3BS2uttVaZlW7QoEHRqlWrWHDBBRusX2mllaJjx46x0UYbxeyzz17FsQIAAADQmAOolMHTYostNtp1GTwBAAAAwAQNwQMAAACACSWAAgAAAKBSAigAAAAAKiWAAgAAAKBSAigAAAAAKiWAAgAAAKBSLcZlo2222Wa8d9ysWbO4+OKLJ+SYAAAAAGhEximAeuSRR0qgNK5GjBgxXtsDAAAA0MQDqFqoBAAAAACVBFDPP//8eO8YAAAAAJIm5AAAAABMHkPwRvbJJ5/EY489FsOHD4/vv/++bvm3334b77//fvTr1y+uvvrqiXWcAAAAADSlAGrw4MGx9dZbx4cffjjxjwgAAACARmWCAqizzz47hg0bNtZt5p133gk9JgAAAACaeg+ogQMHRrNmzWLDDTeMvffeu9w+6qij4rTTTot55pmn3D/ssMMm/tECAAAA0DQCqNrQu5122im22GKLGDFiRAmdunbtGocccki5f/7550/sYwUAAACgqQRQbdq0qbs93XTTxVxzzVUakqesgEqDBg2aWMcIAAAAQFMLoDJwSieffHJ8+umn0alTp/j3v/8dt99+e5x++ull3TfffDNxjxQAAACAphNAZe+nHGbXr1+/GD58ePz2t78tQVTPnj3jzjvvLMPxFllkkYl/tAAAAAA0jQBqm222ie22264Mv5tzzjljvfXWi0UXXbSEUnlp3bp1aU4OAAAAAC0m9IEHHHBA7L777uV2q1at4vLLL49//vOf8dlnn8XKK68c884778Q8TgAAAACaUgA1cODAcr300ks3aEzerVu3GDZsWFn/yiuvxKqrrjrxjhQAAACAphNAde/ePZo3bx733HNPzDrrrA3WZQDVo0eP0qhcAAUAAADAOAVQgwcPLjPc1Ze9ni666KJo27Ztg+UvvfRSuX7vvfcm5nECAAAA0JgDqKxmuuGGG+Ktt94q93OWu5QB1Ojk+mxODgAAAADjNAteNhnfZ5996ma5q6ndH/nSsmXL2H777as8bgAAAAAaWw+o9dZbr/R7+v777+PPf/5zqXI67bTTYsYZZ6zbJpe1bt06OnbsGO3atavqmAEAAABorE3Il19++XK9++67l7Dpd7/73Sg9oAAAAADgZ8+Ct+eee5brHG7Xv3//eOGFF0ogtcgii8RvfvObCdklAAAAAI3UBAVQtZnxevToEYMGDWqw/Fe/+lWcfvrpMd98802M4wMAAACgKTQhH9kHH3xQ+kBl+DRyA/Kshsp1w4YNm/hHCwAAAEDTCKB69+4d7733Xkw11VSx9dZbxznnnBNnn312bLXVVtGiRYt49913yzYAAAAAMEFD8Pr161d6Pu21116x00471S1fY401ykx5p5xyStx1111x4IEHTsxjBQAAAKCpVEC98847dYHTyGrLhg4d+nOPDQAAAICmGkC1b9++XL/xxhujrBsyZEi5nm666X7usQEAAADQVAOoTp06lYbjvXr1iqeffrpued4+5phjyvC8pZdeemIeJwAAAABNqQfUNttsE/fcc0+8+eabsemmm0a7du3K8k8//bQEUxlAZUNyAAAAAJigCqgVVlgh9t5773I7A6dPPvmkXPJ22nnnncs2AAAAADBOFVBnnXVWud5uu+2ibdu25fYuu+wSXbp0icsvvzyef/75aNGiRXTs2DE22WQT4RMAAAAA4x9A5bC6HG5XC6DSMsssUy4AAAAAMFGH4AEAAADAuBJAAQAAADD5zIL35JNPxgwzzDDO2y+//PITckwAAAAANNUAqkePHuO8bfaMevbZZyfkmAAAAABoqgHUiBEjqjsSAAAAABql8Qqgtt1222jXrl11RwMAAABA0w6gtttuu5h11lmrOxoAAAAAGh2z4AEAAABQKQEUAAAAAJN+CN6GG25YZrWbZpppqj0aAAAAAJpmAHXcccdVfyQAAAAANEqG4AEAAABQKQEUAAAAAJUSQAEAAABQKQEUAAAAAJUSQAEAAABQKQEUAAAAAJUSQAEAAABQKQEUAAAAAJUSQAEAAABQKQEUAAAAAJUSQAEAAABQKQEUAAAAAJUSQAEAAABQKQEUAAAAAJUSQAEAAABQKQEUAAAAAJUSQAEAAABQKQEUAAAAAJUSQAEAAABQKQEUAAAAAJUSQAEAAABQKQEUAAAAAJUSQAEAAABQKQEUAAAAAJUSQAEAAABQKQEUAAAAAJUSQAEAAABQKQEUAAAAAJUSQAEAAABQKQEUAAAAAJUSQAEAAABQKQEUAAAAAJUSQAEAAABQKQEUAAAAAJUSQAEAAABQKQEUAAAAAJUSQAEAAABQKQEUAAAAAJUSQAEAAABQKQEUAAAAAE0rgHrnnXeiZ8+e0bdv3/j8888n9eEAAAAA0JgCqGuvvTbWXHPNGDBgQJx//vnRtWvXePPNNxts89prr0WnTp1GeWz//v1jvfXWi6WWWiq22WabGDJkSN267t27x5lnntlg+7feeitWXnnl2GGHHeLrr7+u8KsCAAAAaNommwDqs88+i6OPPjp69eoVW2yxRZx22mkx//zzx6mnnlq3zdtvvx0777xzfPXVV6OESbvvvnt069athFgzzjhj7LbbbjFixIjRPteHH35Ygqc55pgjzjrrrGjVqlXlXx8AAABAUzXZBFAvv/xyfPHFF6XqKTVv3rwMxavdv+uuu0rANLqw6Jprroklllgitt9++1hooYXi2GOPLZVTjzzyyCjb5rC+DLFat24dvXv3jjZt2vwCXx0AAABA09UiJhMzzzxzuR44cGDdsvpD7e69997o0aNHqYrKIXb1Pfnkk7HccsvV3c9QafHFF48nnngiunTpUrf822+/LfsYPnx4XH755dG+ffuKvyoAAAAAJpsAKofDbbrpprHHHnvE7LPPXiqdsqqpbdu2ZX0Oz0sPP/zwKI997733okOHDg2WzTTTTKWheU0Oxzv44IPj/vvvj8suu6ysnxDNmzWboMc1Tl4LxlUzPzuMhfcG48J5hLHx3mBcOZcwNt4bjAvnkSk+gKqFTKusskqcc845ccYZZ8Qll1wSZ599dmksPjY5dG/koXl5v35z8auuuiq+//77WGCBBUrfpwsvvDCaTcCb5vsx9JVqmrwWjKsRfnYYC+8NxoXzCGPjvcG4ci5hbLw3GBfOI1N8D6ianAVv9dVXL7PWLb/88qVq6adkP6eRZ7LL+/X7O3355ZfRp0+fOO6440oV1ZVXXlnJ8QMAAAAwmQZQQ4cOjX79+tXdz6F3u+66awwaNKj0bBqbWWedNd5///0Gy/L+LLPMUnd/6623jsUWW6z0lerevXuccMIJpVE5AAAAAE0kgHr00UdLg/BPP/20btkHH3wQLVq0iGmmmWasj80hevn4+kPynn322QZD93I/NXvvvXfMMMMMccghh5TeUAAAAAA0gQBq5ZVXLkFThkLDhg2LV155pVQpde3aNVq2bDnWx2688cbx2GOPRe/eveOll16Kgw46KOaaa64GM+DVl89zxBFHxIABAwzFAwAAAGgqAVS7du1KgPTGG2/EFVdcESeeeGJ07NixBEU/JcOm7Bl13XXXxSabbBIfffRRaV4+tibj2ex8vfXWK89jKB4AAABAE5kFL4fMZYiUM+Bl9dLoKphy2QsvvDDaQCkvo5Oz6Y3OySefPBGOGgAAAIApogJq5JBpzjnnnNSHAQAAAEBjq4CqGVPvJgAAAACmPJNlBRQAAAAAjYcACgAAAIBKCaAAAAAAqJQACgAAAIBKCaAAAAAAqJQACgAAAIBKCaAAAAAAqJQACgAAAIBKCaAAAAAAqJQACgAAAIBKCaAAAAAAqJQACgAAAIBKCaAAAAAAqJQACgAAAIBKCaAAAAAAqJQACgAAAIBKCaAAAAAAqJQACgAAAIBKCaAAAAAAqJQACgAAAIBKCaAAAAAAqJQACgAAAIBKCaAAAAAAqJQACgAAAIBKCaAAAAAAqJQACgAAAIBKCaAAAAAAqJQACgAAAIBKCaAAAAAAqJQACgAAAIBKCaAAAAAAqJQACgAAAIBKCaAAAAAAqJQACgAAAIBKCaAAAAAAqJQACgAAAIBKCaAAAAAAqJQACgAAAIBKCaAAAAAAqJQACgAAAIBKCaAAAAAAqJQACgAAAIBKCaAAAAAAqJQACgAAAIBKCaAAAAAAqJQACgAAAIBKCaAAAAAAqJQACgAAAIBKCaAAAAAAqJQACgAAAIBKCaAAAAAAqJQACgAAAIBKCaAAAAAAqJQACgAAAIBKCaAAAAAAqJQACgAAAIBKCaAAAAAAqJQACgAAAIBKCaAAAAAAqJQACgAAAIBKCaAAAAAAqJQACgAAAIBKCaAAAAAAqJQACgAAAIBKCaAAAAAAqJQACgAAAIBKCaAAAAAAqJQACgAAAIBKCaAAAAAAqJQACgAAAIBKCaAAAAAAqJQACgAAAIBKCaAAAAAAqJQACgAAAIBKCaAAAAAAqJQACgAAAIBKCaAAAAAAqJQACgAAAIBKCaAAAAAAqJQACgAAAIBKCaAAAAAAqJQACgAAAIBKCaAAAAAAqJQACgAAAIBKCaAAAAAAqJQACgAAAIBKCaAAAAAAqJQACgAAAIBKCaAAAAAAqJQACgAAAIBKCaAAAAAAqJQACgAAAIBKCaAAAAAAqJQACgAAAIBKCaAAAAAAqJQACgAAAIBKTXEB1DvvvBM9e/aMvn37xueffz6pDwcAAACAxhRAXXvttbHmmmvGgAED4vzzz4+uXbvGm2++WdYdffTRsfDCCze4XHrppWXdgQceWC71ffzxx/HHP/4xNtxww3IbAAAAgGq0iCnEZ599VkKmXr16xZAhQ2K55ZaL8847L0499dQ46aST4uWXXy6VURtttFHdY9q1azfafX311Vex6667xtdffx2XX355tG/f/hf8SgAAAACalimmAioDpi+++KJUPaXmzZuXwKl2P9cvtthiMcsss9Rd2rRpM8p+vvvuu9hnn31K5dSFF14YM8000y/+tQAAAAA0JVNMBdTMM89crgcOHFi3rFOnTuX6008/jaFDh8Z88833k/s54ogj4oknnojLLrss5phjjgqPGAAAAIApKoDKsGjTTTeNPfbYI2afffZo1apVLLHEEtG2bdtS/dSsWbMyJO/++++P6aefPrbbbrsGw/HSaaedFtdcc00cf/zxMf/880/QcTRv1mwifUWNgdeCcdXMzw5j4b3BuHAeYWy8NxhXziWMjfcG48J5pNEHUCl7QK2yyipxzjnnxBlnnBGXXHJJnH322TF48OASQHXs2DG23nrrUiV12GGHlR5Q2bQ8/ec//yl9pDK06t27d6y77rolxBpf348YUcFXNqXyWjCuRvjZYSy8NxgXziOMjfcG48q5hLHx3mBcOI80iQAqZaD0/PPPx5577hm33HJLHHzwwXHrrbfGaqutViqf0iKLLBKvvvpqXHHFFXUB1PDhw0tYteCCC8Z6661XAqz99ttvEn81AAAAAI3fFNOEPHs89evXr+5+Dr3LmewGDRoUH3/8cV34VJPVUPmYmmxWntVTc845Z+y9997Rt2/feOqpp37RrwEAAACgKZpiAqhHH300evToURqO13zwwQfRokWLuOCCC2LbbbdtsH1WSWUIVZPb1XTv3j0WX3zxOOigg+Lrr7/+hb4CAAAAgKZpigmgVl555ZhmmmnikEMOiWHDhsUrr7wSJ5xwQqlsymF22fepT58+8frrr8fll18eN954Y2y//faj3Vfz5s2jV69eZZheDsUDAAAAoDpTTACVDcWzefgbb7xRejudeOKJpcLpiCOOiE6dOsXpp58eN910U+nvlM3JTz755FhmmWXGuL/sE7XDDjsYigcAAABQsSmqCflSSy0V1113Xala6tKlS7nU/P73vy+X0TnuuONGu3zfffctFwAAAACqM8VUQNWXwVM2EwcAAABg8jdFVUDV1K98AgAAAGDyNkVWQAEAAAAw5RBAAQAAAFApARQAAAAAlRJAAQAAAFApARQAAAAAlRJAAQAAAFApARQAAAAAlRJAAQAAAFApARQAAAAAlRJAAQAAAFApARQAAAAAlRJAAQAAAFApARQAAAAAlRJAAQAAAFApARQAAAAAlRJAAQAAAFApARQAAAAAlRJAAQAAAFApARQAAAAAlRJAAQAAAFApARQAAAAAlRJAAQAAAFApARQAAAAAlRJAAQAAAFApARQAAAAAlRJAAQAAAFApARQAAAAAlRJAAQAAAFApARQAAAAAlRJAAQAAAFApARQAAAAAlRJAAQAAAFApARQAAAAAlRJAAQAAAFApARQAAAAAlRJAAQAAAFApARQAAAAAlRJAAQAAAFApARQAAAAAlRJAAQAAAFApARQAAAAAlRJAAQAAAFApARQAAAAAlRJAAQAAAFApARQAAAAAlRJAAQAAAFApARQAAAAAlRJAAQAAAFApARQAAAAAlRJAAQAAAFApARQAAAAAlRJAAQAAAFApARQAAAAAlRJAAQAAAFApARQAAAAAlRJAAQAAAFApARQAAAAAlRJAAQAAAFApARQAAAAAlRJAAQAAAFApARQAAAAAlRJAAQAAAFApARQAAAAAlRJAAQAAAFApARQAAAAAlRJAAQAAAFApARQAAAAAlRJAAQAAAFApARQAAAAAlRJAAQAAAFApARQAAAAAlRJAAQAAAFApARQAAAAAlRJAAQAAAFApARQAAAAAlRJAAQAAAFApARQAAAAAlRJAAQAAAFApARQAAAAAlRJAAQAAAFApARQAAAAAlRJAAQAAAFApARQAAAAAlRJAAQAAAFApARQAAAAAlRJAAQAAAFApARQAAAAAlRJAAQAAAFApARQAAAAAlRJAAQAAAFApARQAAAAAlRJAAQAAAFApARQAAAAAlRJAAQAAAFApARQAAAAAlRJAAQAAAFCpKS6Aeuedd6Jnz57Rt2/f+Pzzzyf14QAAAADQmAKoa6+9NtZcc80YMGBAnH/++dG1a9d48803y7ohQ4bEtttuG0svvXSsu+668cADD9Q97swzz4zu3bs32NfXX39dtl911VXjrbfe+sW/FgAAAICmYooJoD777LM4+uijo1evXrHFFlvEaaedFvPPP3+ceuqpMWLEiNh9991j5plnjuuuuy422GCD2GOPPcYYLH3//fex//77x4svvhgXXnhhzDHHHL/41wMAAADQVEwxAdTLL78cX3zxRal6Ss2bNy9D8fL+Qw89VCqgjjrqqFhggQVi5513LpVQGUaNzjHHHFMqpPr06VNCLAAAAACq0yKmEFndlAYOHFi3rFOnTuX6vPPOi8UWWyymmWaaunXLLrtsPPHEE6Ps55xzzinBVIZPiy666C9y7I3ZoOHDY/HLL5/Uh8Fk/h5ZYLbZJvVhMJn7bMibcc+mW0/qw2Ayfn/EggtO6sNgMvf5W0Oi/84bTerDYDJ/j8RCziWM3XtD3opem+w2qQ+Dyfj9Mf2CC03qw5hiTTEBVA6T23TTTcvQutlnnz1atWoVSyyxRLRt2zbee++96NChQ4PtZ5ppptKwvL5rrrkmTj/99Nhrr71KQDUhWrdq+bO+jsbk2WefndSHADQCziXAz+U8AkwMziVQrSlmCF7KHlAnnXRStGnTJs4444xYe+2148knnyxD8zKQqi/vZ6Pxmpdeein++te/luDpsssui2HDhk2CrwAAAACg6ZmiAqiUs+CtvvrqZWa75ZdfPg4++OBo3bp1g7Ap5f2pp5667v6HH34YRxxxRPTu3TtatGhRmpkDAAAAUL0pJoAaOnRo9OvXr+5+Dr3bddddY9CgQWX43fvvv99g+7xff1jer3/96zKEr127dnH44YfH7bffHv/6179+0a8BAAAAoCmaYgKoRx99NHr06BGffvpp3bIPPvigVDNlL6hnnnkmvvzyywbbL7XUUnX3c7ua3//+92X43pFHHmkoHgAAAEDFppgAauWVVy6z3B1yyCElNHrllVfihBNOiK5du8aKK65YGpMfdNBBpddTDrN76qmnYpNNNhnj/g499NAyTM9QPAAAAIBqTTEBVA6dy2DpjTfeiCuuuCJOPPHE6NixY+nrNNVUU8U555xTZsPr1q1b3HzzzXH22WeXmfPGJIfn/d///Z+heAAAAAAVazZixIgRMYXJGfC6dOlSLgAAAABM3qaYCqj6Mniac845J/VhAAAAANBYK6AAAAAAmHJMkRVQMDEtvPDC8fDDD1ey7+eeey4ee+yxSvYNTB7nj7y89dZbo6zLfoW57swzz/zZz5P9D3NfeZ2GDBkS991338/eLzD5WX311evOLXlZfPHFy6Q7F110UVnfvXv3iXJeARq3Aw88sMG5ZOTLmD7/XH/99eU8NLb95mVc5Az2N9544wR/DTQ+Aiio0O677x6vvvrqpD4MoEItW7aMfv36jbL8rrvuimbNmk2U58iZXh944IFynQ4++OAy2yvQOOXPeP7M5yXPJTvvvHOZ/dkHOWBc5ezxtfNInlNmm222uvt5WWaZZSo/hgzOr7vuusqfhylHi0l9AAAwJVtuueVKALX11ls3+I/f448/HosttthEeY6c7XWWWWaZKPsCJn/TTjttg5/5jTbaKG699VYzNwPjdR7JS+32pPhbQrcfRqYCCn7ipHneeeeVMtQlllgifvvb38ZZZ51Vtz7L4PP+FltsEUsttVRsueWW8fLLL9ete/PNN+Oggw6qK1PNdTvssEP8+te/jt/97nflsd9//31Zl+X0++yzT9k+97X22mvH3XffPYm+cmBcrbHGGvHII4+U0Knm3nvvLcFU27ZtRylrX2eddaJTp07RrVu3GDhwYN26PM9cdtllsdlmm8WSSy4ZG2ywQTz99NOjDMHL80k+X54/8jyT3nnnnejRo0d07ty5TNRx9NFHx9dff133nH/6059KReayyy4bN9988y/0ygATU4sWLUrFZRo6dGj85S9/KeeK/Huhf//+ddsNHz48DjvssFhxxRXLz/z//d//lWUph9zkuebyyy8vf4csvfTSZX3tfJH+/e9/x7rrrlv+Ftlkk03K+QZoXB599NG6zy95Hthxxx3j3XffbbDNKaecUveZ5ZJLLhnjvsZ0zsi/P/Jvlbyff8NAEkDBWGSp+8UXXxzHHHNM3HHHHeUDXAZFzzzzTN02559/fvnjL0+ys846a+y0007lD7ncLktds+Q1S2CHDRtWAqoOHTrENddcE0cccURceuml8Y9//KPBCTxDr9zXxhtvHHvttVcMGjRoEn31wLj41a9+VX7277///gY/y7///e8bbJc/17169SpDafLckh8O83yRHyRr8ryRyzIkyv9WZpA0sjyfZNn89ttvX7bP882f//zn+OKLL8ofiKeddloJwHK4Tk1WYy244IJx9dVXlyAdmHJ88803pfLpwQcfLIF3ynNIfuC77bbbyj/I9t9//7pKgz322KP0oMx/oF144YXln1/1+7Xkh8w777wzLrjggnIOyX3XhvY9//zzccABB8Suu+5azkPrr79++WD62muvTaKvHpjYPvnkk/K3yEorrVQqK/v06ROvv/569O7du26b/Cf6Cy+8EFdddVXsu+++cfzxx4+2Z9TYzhl5jsq/VfJvlhzyB0kABWOR/VaOPfbYWGGFFWKuueYq/ynI0tWXXnqpbpuVV145tt1221hggQXKh8sMmvKPxOmnn76UutbKX/ME36ZNm7JNbpsfTrNiIf8ArJluuuniqKOOKuvzQ2iesI2bhslffiis9YHKQKj+B8WaDIeyYmnDDTeMjh07xn777VfCqwyi6w+zyXPD/PPPH9ttt11dBVR9eT7JKohpppmmnGf+85//lBDrxBNPLP9hzPPV4YcfXpqgf/bZZ+Ux2Ysq/zjMc8uMM85Y+esB/Dz5T6r8GyAvWTGZH/AyaM4Pdyn/8ZVVlPPMM0/5sPfee+/FBx98UD4MZrVBng/ycXnJ23l+Gjx4cF2gdeihh5bzRVY25OV///tfWZcfRLMK849//GPMO++8sc0225S/c/J8AjQOX375Zey2227lH+tzzz13qZRca621Gny+ad26dRx33HGx0EILlb9N8pxw5ZVXjrKvsZ0zpp566vK3Sv7Noo0ANXpAwVj85je/iSeffDJOPvnk8h/E/I9i/pFXGzaXsjS1pl27duWDY2672mqrNdhXLsuZbLKEvib/sMz9ffzxx+V+/hezVatWdevzfm1IHzD5yrApKxa//fbbGDBgQAmWZppppgbb5M9y/rFXX5a91/8Zn2+++RqcT/KD4k/Jx+fjMsCuf17KY8n/aKY8lvxDEJgy5PkkPxDWPgjmh7f8p1ZNfmisf65IX331VQmZ2rdvX/4WqcngOc8Pua7WDyY/KNZ/fJ4vaueTf/7zn6XqoSbPQyonofHI80n+MywbhOdnmxxtkdVO9T/T5DlmhhlmqLufPS1zBMfInDMYXwIoGIs80f7tb3+LTTfdtPwhmP+BzGS/vvqBUvruu++iefNRiwvzD8iR1YKsfMz47AuYvOR/D2s9FXLGqjXXXHOczgH5M14/0K71dxkfY9pv/evRbQNMvjI0rh8Sjax+GFWTQ/Dq/xOrvjwX1M4HaeTtasP3cpusqMoPp/UJsKHxyKrpbPWR/xjPdgBZwZRD9/Of7jUjf/7Iv1VG9zeKcwbjyydbGIssH82KhezjlCfW/E9AlrjXn9Ehy93rj6nOioPRNdrL/0Zm76j6FQ3ZlyWHw+QwmpT/faj/YTSH32jaB5O/DI9XWWWVMszlnnvuGaX/U+0cUP+Pu5T361cqTIh8/KuvvhofffRR3bInnniiHFMOzwGajjwfZFV1bbhdyuqGnCRhXM41uU1OdpDhV+2SlQ31e9wBU7bsU5lVkdnHNof25qQpQ4YMafD5Ju9nb8map556qrQPGN9zRrYAgPoEUPDjSTVPlPUvedLNwCmH07zyyislDMpZ6jJAqj9bzC233FKad2YJajYHnmOOOcosVCnHPecfgfnBMMdG5+OyN0tum1US2fwz+0rVTs55ss9eDfmYc889twRWOZsEMGUMw8uqyaxcqD88piZ7xWW/pzxf5DnlpJNOKgH2hPyM57klQ6cMxLOJaD5fNiHOEPuhhx4qvebWW2+9MhQHaDpyuF32X8mK7fzbJi95e/nlly9Dg39Knqduv/32MkFK/kMth+jkpf7wYGDKlv/4fuutt8pnnPzskc3HczKC+p9vckhvnjuyL1T2fsqJCzKsGt9zRva/zYkPMqSCZAgeRJQPgiPLE3FWPuUlp0PPD5U5fXqeSHO8dE2tKV82DM3/IPz973+vG0qX4VLuOz8o5jSk2XA8Z9TLaqqsfMoTec5CUZPTl2YT81yfJ+78hTC6D7LA5Cf7HWQfldFVP6WcDeb999+PM844o/R+W3TRRaNv377lA+P4ymHBeW7KadhvuOGGOOecc0rolGX0bdu2LeelnLUGaHpytqqcQTM/GOZQvQzHDzrooHF6bPalyxk08x9keZ1VlNkHMwMsoHHIzzMDBw4svebyn+BLLrlkCZtqM+um/BslZ/jNvyvyH/LZkiR7047vOSNbEuTnpD/84Q+lSnzk/pg0Pc1G1K+1A8ZLzmjVuXPn2HPPPX/2vvLEnTPX5ExZAAAA0JgYggcAAABApQRQAAAAAFTKEDwAAAAAKqUCCgAAAIBKCaAAAAAAqJQACgAAAIBKCaAAAAAAqJQACgAAAIBKtah29wAAjcOBBx4YN9xwQ4NlzZs3j5YtW8YMM8wQv/nNb2KPPfaIueee+xc9rm233TYGDBhQbr/wwgu/2PN+8MEH8f3338css8zyiz0nADDlUgEFADCBMoD56quv4p133okbb7wxttxyy3j77bejMfviiy/inHPOid///vcxePDgSX04AMAUQgAFADCerrrqqrjvvvvirrvuiksuuSSWWWaZsvzdd98t4Uxj1qdPnzj99NPj888/n9SHAgBMQQRQAADjaeaZZ47ZZputDLfr3LlznHHGGTHVVFOVdQ8++GA0ZiNGjJjUhwAATIEEUAAAP1OHDh1KH6j03nvvlevVV189Fl544dhrr73iggsuiC5dusTSSy8dp512Wt3j/ve//0XPnj3jd7/7XSyxxBKxyiqrxCGHHBJDhgwZ5Tk++eSTOProo8u2nTp1is022ywefvjh0R7P9ddfX547L3m7vtpx5XV9OZTw3HPPjT/+8Y9l/3m8f/rTn+Kmm26q26Z79+5x1lln1d3fZpttyr4AAH6KJuQAAD/T0KFD48MPPyy3Z5pppgbrskH4nXfeWXd/qaWWKtfZMyrDpm+//bZuXfaSuvbaa+OOO+6I888/P5Zbbrm6cCjDn+eee65u2yeffDK23377mG666X728X/99delmfljjz1Wtyyf8/HHHy+XV155Jfbee++f/TwAQNOlAgoAYDy9//77JSzKSqX+/fvHbrvtFt99911ZN3Jl0ccffxxrr712/POf/4xTTz21VDDl4w4//PASPmXl1Iknnhi33357HHHEETHNNNPEp59+GnvuuWe5TldeeWVd+JSVUpdddlmpbFphhRXKbHQ/14UXXlgXPuWxZjh29dVXx+KLL16W9e7duxxz9n7KoKomq7myFxYAwE9RAQUAMJ4233zz0S6fZ555Yo899hhl+f777x9zzTVXdOzYsdzPKqesMEoZRK277rrl9gILLBBffvllHH/88TFs2LC47bbbynPVD3kyxMrnSSeffHKsuuqqP7sheD5Pmnbaactzt2nTptw/5phjSgVXDrPLoKxdu3blUjPjjDOWXlgAAD9FAAUAMIGy8fjUU08ds846a/z2t7+NXXfdtYQy9WVFU4ZP9b3wwgt1t1dcccUG6+rfr2331ltvlevpp5++LnxKOfxu/vnnj2eeeeZnNRF/7bXXyvW8885bFz6lRRddtFwAAH4uARQAwHi6++67RwmVxqR+xVBNbca8n9KsWbMG16MLj5o3H3tHhdrQwJpa5VV9tf3W70cFADAx6QEFAFChli1bjrIsh9rV5BC3+rKnVM0iiyxSrmtVT8OHD6+rVqrdf/nll0fZf6tWrepuf/bZZ3W3c6jeRx99NMr2WfmUXn/99TLbXv1Z+nK2vQMPPLCuR1QtDBtTIAYAMDoCKACAX9gGG2wQLVr8UIh+1FFHxa233lqCpGw2fuaZZ5blM888c6yzzjrl9lprrVX32JyNbuDAgfH000/HPvvsM9r+TzkksCablWfolM3Qe/XqNUpFVKr1oMp9/d///V8Z0pfhU26fs+3dcMMNdUML64dbuV2uBwD4KYbgAQD8wrIC6tBDDy0BTzYb79mz5yjD9nLGudrwvQ033DBuueWWUi317LPPxtZbb103/C6rpJ5//vkGj19qqaXKEME33nij9JHq0qVLWd62bdtYcMEFY9CgQQ2233777ePee++NJ554Iu65555yqS97W80333wNqrLSCSecUK7zuEbufQUAUJ8KKACASWCLLbaIq666KtZbb73o0KFDGaqXlUsbb7xx3HjjjbHccss16Bl13nnnxY477li2zSqkZZZZJvr06TNKE/OU6/v27RurrbZaCZ1ydrusorrmmmsaDP+rad26dVx88cWx1157lfX5+Gx4/utf/zpOOeWUUnVVs9JKK0X37t3LsebjfvWrX5WZ+wAAxqbZCIP3AQAAAKiQCigAAAAAKiWAAgAAAKBSAigAAAAAKiWAAgAAAKBSAigAAAAAKiWAAgAAAKBSAigAAAAAKiWAAgAAAKBSAigAAAAAKiWAAgAAAKBSAigAAAAAKiWAAgAAACCq9P8AGtheyQCQiU4AAAAASUVORK5CYII=", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Highly customized plot\n", + "fig, ax = plt.subplots(figsize=(12, 8))\n", + "\n", + "# Create the plot\n", + "product_sales = df_sales.groupby('Product')['Sales'].sum()\n", + "bars = product_sales.plot(kind='bar', ax=ax, \n", + " color=['#FF6B6B', '#4ECDC4', '#45B7D1', '#96CEB4'],\n", + " edgecolor='black', linewidth=1.2)\n", + "\n", + "# Customize the plot\n", + "ax.set_title('Total Sales by Product', fontsize=18, fontweight='bold', pad=20)\n", + "ax.set_xlabel('Product', fontsize=14, fontweight='bold')\n", + "ax.set_ylabel('Total Sales ($)', fontsize=14, fontweight='bold')\n", + "\n", + "# Add value labels on bars\n", + "for i, bar in enumerate(ax.patches):\n", + " height = bar.get_height()\n", + " ax.text(bar.get_x() + bar.get_width()/2., height + 1000,\n", + " f'${height:,.0f}', ha='center', va='bottom', fontweight='bold')\n", + "\n", + "# Styling\n", + "ax.spines['top'].set_visible(False)\n", + "ax.spines['right'].set_visible(False)\n", + "ax.grid(True, alpha=0.3, axis='y')\n", + "ax.set_facecolor('#F8F9FA')\n", + "plt.xticks(rotation=0)\n", + "\n", + "# Format y-axis to show values in thousands\n", + "ax.yaxis.set_major_formatter(plt.FuncFormatter(lambda x, p: f'${x/1000:.0f}K'))\n", + "\n", + "plt.tight_layout()\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Practice Exercises\n", + "\n", + "Apply your visualization skills to real scenarios:" + ] + }, + { + "cell_type": "code", + "execution_count": 69, + "metadata": {}, + "outputs": [], + "source": [ + "# Exercise 1: Create a comprehensive sales dashboard\n", + "# Include: time series, comparison charts, distribution, and proportions\n", + "# Make it publication-ready with proper titles, labels, and styling\n", + "\n", + "# Your code here:\n", + "def create_sales_dashboard(df):\n", + " \"\"\"Create a comprehensive sales dashboard\"\"\"\n", + " # Your implementation here\n", + " pass\n", + "\n", + "# create_sales_dashboard(df_sales)" + ] + }, + { + "cell_type": "code", + "execution_count": 70, + "metadata": {}, + "outputs": [], + "source": [ + "# Exercise 2: Stock performance analysis\n", + "# Create visualizations to compare stock performance:\n", + "# - Price movements over time\n", + "# - Daily returns distribution\n", + "# - Correlation analysis\n", + "# - Risk vs return scatter plot\n", + "\n", + "# Your code here:\n" + ] + }, + { + "cell_type": "code", + "execution_count": 71, + "metadata": {}, + "outputs": [], + "source": [ + "# Exercise 3: Advanced time series visualization\n", + "# Create a plot that shows:\n", + "# - Original time series\n", + "# - Trend line\n", + "# - Seasonal decomposition\n", + "# - Confidence intervals\n", + "\n", + "# Your code here:\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Key Takeaways\n", + "\n", + "1. **Pandas Integration**: Direct plotting from DataFrames is convenient and powerful\n", + "2. **Plot Types**: Choose the right visualization for your data type and message\n", + "3. **Customization**: Always add titles, labels, and legends for clarity\n", + "4. **Color and Style**: Use consistent, accessible color schemes\n", + "5. **Subplots**: Combine multiple views for comprehensive analysis\n", + "6. **Data Preparation**: Clean and prepare data before visualizing\n", + "7. **Interactivity**: Consider interactive plots for exploration\n", + "\n", + "## Plot Type Quick Reference\n", + "\n", + "| Data Type | Best Plot Type | Pandas Method |\n", + "|-----------|---------------|---------------|\n", + "| Time Series | Line Plot | `.plot()` or `.plot(kind='line')` |\n", + "| Categories | Bar Chart | `.plot(kind='bar')` |\n", + "| Distribution | Histogram | `.plot(kind='hist')` |\n", + "| Relationships | Scatter Plot | `.plot(kind='scatter')` |\n", + "| Proportions | Pie Chart | `.plot(kind='pie')` |\n", + "| Comparisons | Box Plot | `.boxplot()` |\n", + "| Cumulative | Area Plot | `.plot(kind='area')` |\n", + "\n", + "## Best Practices\n", + "\n", + "1. **Start Simple**: Begin with basic plots, then add complexity\n", + "2. **Tell a Story**: Each plot should convey a clear message\n", + "3. **Consider Your Audience**: Adjust complexity and detail accordingly\n", + "4. **Use Consistent Styling**: Maintain visual consistency across plots\n", + "5. **Test Different Views**: Try multiple plot types for the same data\n", + "6. **Add Context**: Include benchmarks, targets, or reference lines\n", + "7. **Make it Accessible**: Use colorblind-friendly palettes" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.13.3" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/Session_01/PandasDataFrame-exmples/13_advanced_data_cleaning.ipynb b/Session_01/PandasDataFrame-exmples/13_advanced_data_cleaning.ipynb new file mode 100755 index 0000000..d2636ce --- /dev/null +++ b/Session_01/PandasDataFrame-exmples/13_advanced_data_cleaning.ipynb @@ -0,0 +1,815 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Session 1 - DataFrames - Lesson 13: Advanced Data Cleaning\n", + "\n", + "## Learning Objectives\n", + "- Master advanced techniques for data cleaning and validation\n", + "- Learn to detect and handle various types of data quality issues\n", + "- Understand data standardization and normalization techniques\n", + "- Practice with real-world messy data scenarios\n", + "- Develop automated data cleaning pipelines\n", + "\n", + "## Prerequisites\n", + "- Completed previous lessons on DataFrames\n", + "- Understanding of basic data cleaning concepts\n", + "- Familiarity with regular expressions (helpful but not required)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Import required libraries\n", + "import pandas as pd\n", + "import numpy as np\n", + "import re\n", + "from datetime import datetime, timedelta\n", + "import warnings\n", + "warnings.filterwarnings('ignore')\n", + "\n", + "# Display settings\n", + "pd.set_option('display.max_columns', None)\n", + "pd.set_option('display.max_rows', 100)\n", + "\n", + "print(\"Libraries loaded successfully!\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Creating Messy Sample Data\n", + "\n", + "Let's create a realistic messy dataset to practice advanced cleaning techniques." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "\n", + "# Create intentionally messy data that mimics real-world issues\n", + "np.random.seed(42)\n", + "\n", + "# Base data\n", + "n_records = 200\n", + "messy_data = {\n", + " 'customer_id': [f'CUST{i:04d}' if i % 10 != 0 else f'cust{i:04d}' for i in range(1, n_records + 1)],\n", + " 'customer_name': [\n", + " 'John Smith', 'jane doe', 'MARY JOHNSON', 'bob wilson', 'Sarah Davis',\n", + " 'Mike Brown', 'lisa garcia', 'DAVID MILLER', 'Amy Wilson', 'Tom Anderson'\n", + " ] * 20,\n", + " 'email': [\n", + " 'john.smith@email.com', 'JANE.DOE@EMAIL.COM', 'mary@company.org',\n", + " 'bob..wilson@test.com', 'sarah@invalid-email', 'mike@email.com',\n", + " 'lisa.garcia@email.com', 'david@company.org', 'amy@email.com', 'tom@test.com'\n", + " ] * 20,\n", + " 'phone': [\n", + " '(555) 123-4567', '555.987.6543', '5551234567', '555-987-6543',\n", + " '(555)123-4567', '+1-555-123-4567', '555 123 4567', '5559876543',\n", + " '(555) 987 6543', '555-123-4567'\n", + " ] * 20,\n", + " 'address': [\n", + " '123 Main St, Anytown, NY 12345', '456 Oak Ave, Boston, MA 02101',\n", + " '789 Pine Rd, Los Angeles, CA 90210', '321 Elm St, Chicago, IL 60601',\n", + " '654 Maple Dr, Houston, TX 77001', '987 Cedar Ln, Phoenix, AZ 85001',\n", + " '147 Birch Way, Philadelphia, PA 19101', '258 Ash Ct, San Antonio, TX 78201',\n", + " '369 Walnut St, San Diego, CA 92101', '741 Cherry Ave, Dallas, TX 75201'\n", + " ] * 20,\n", + " 'purchase_amount': np.random.normal(100, 30, n_records).round(2),\n", + " 'purchase_date': [\n", + " '2024-01-15', '01/16/2024', '2024-1-17', '16-01-2024', '2024/01/18',\n", + " 'January 19, 2024', '2024-01-20', '01-21-24', '2024.01.22', '23/01/2024'\n", + " ] * 20,\n", + " 'category': [\n", + " 'Electronics', 'electronics', 'ELECTRONICS', 'Books', 'books',\n", + " 'Clothing', 'clothing', 'CLOTHING', 'Home & Garden', 'home&garden'\n", + " ] * 20,\n", + " 'satisfaction_score': np.random.choice([1, 2, 3, 4, 5, 99, -1, None], n_records, p=[0.05, 0.1, 0.15, 0.35, 0.3, 0.02, 0.02, 0.01])\n", + "}\n", + "\n", + "# Convert to DataFrame first\n", + "df_messy = pd.DataFrame(messy_data)\n", + "\n", + "# Introduce missing values and anomalies using proper indexing\n", + "df_messy.loc[df_messy.index[::25], 'customer_name'] = None # Some missing names\n", + "df_messy.loc[df_messy.index[::30], 'email'] = None # Some missing emails\n", + "df_messy.loc[df_messy.index[::35], 'purchase_amount'] = np.nan # Some missing amounts\n", + "df_messy.loc[df_messy.index[::40], 'purchase_amount'] = -999 # Invalid negative values\n", + "\n", + "# Add some duplicate records\n", + "duplicate_indices = [0, 1, 2, 3, 4]\n", + "duplicate_rows = df_messy.iloc[duplicate_indices].copy()\n", + "df_messy = pd.concat([df_messy, duplicate_rows], ignore_index=True)\n", + "\n", + "print(\"Messy dataset created:\")\n", + "print(f\"Shape: {df_messy.shape}\")\n", + "print(\"\\nFirst few rows:\")\n", + "print(df_messy.head(10))\n", + "print(\"\\nData types:\")\n", + "print(df_messy.dtypes)\n", + "print(\"\\nSample of data quality issues:\")\n", + "print(\"\\n1. Missing values:\")\n", + "print(df_messy.isnull().sum())\n", + "print(\"\\n2. Inconsistent formatting examples:\")\n", + "print(\"Customer IDs:\", df_messy['customer_id'].head(15).tolist())\n", + "print(\"Customer names:\", df_messy['customer_name'].dropna().head(5).tolist())\n", + "print(\"Categories:\", df_messy['category'].unique()[:5])\n", + "print(\"\\n3. Invalid satisfaction scores:\")\n", + "print(\"Unique satisfaction scores:\", sorted(df_messy['satisfaction_score'].dropna().unique()))\n", + "print(\"\\n4. Invalid purchase amounts:\")\n", + "print(\"Negative amounts:\", df_messy[df_messy['purchase_amount'] < 0]['purchase_amount'].count())\n", + "print(\"\\n5. Date format inconsistencies:\")\n", + "print(\"Sample dates:\", df_messy['purchase_date'].head(10).tolist())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 1. Data Quality Assessment\n", + "\n", + "First, let's assess the quality of our messy data." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def assess_data_quality(df):\n", + " \"\"\"Comprehensive data quality assessment\"\"\"\n", + " print(\"=== DATA QUALITY ASSESSMENT ===\")\n", + " print(f\"Dataset shape: {df.shape}\")\n", + " print(f\"Total cells: {df.size}\")\n", + " \n", + " # Missing values analysis\n", + " print(\"\\n--- Missing Values ---\")\n", + " missing_stats = pd.DataFrame({\n", + " 'Missing_Count': df.isnull().sum(),\n", + " 'Missing_Percentage': (df.isnull().sum() / len(df)) * 100\n", + " })\n", + " missing_stats = missing_stats[missing_stats['Missing_Count'] > 0]\n", + " print(missing_stats.round(2))\n", + " \n", + " # Duplicate analysis\n", + " print(\"\\n--- Duplicates ---\")\n", + " total_duplicates = df.duplicated().sum()\n", + " print(f\"Complete duplicate rows: {total_duplicates}\")\n", + " \n", + " # Column-specific analysis\n", + " print(\"\\n--- Column Analysis ---\")\n", + " for col in df.columns:\n", + " unique_count = df[col].nunique()\n", + " unique_percentage = (unique_count / len(df)) * 100\n", + " print(f\"{col}: {unique_count} unique values ({unique_percentage:.1f}%)\")\n", + " \n", + " # Data type issues\n", + " print(\"\\n--- Data Types ---\")\n", + " print(df.dtypes)\n", + " \n", + " return missing_stats, total_duplicates\n", + "\n", + "# Assess the messy data\n", + "missing_stats, duplicate_count = assess_data_quality(df_messy)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Identify specific data quality issues\n", + "def identify_issues(df):\n", + " \"\"\"Identify specific data quality issues\"\"\"\n", + " issues = []\n", + " \n", + " # Check for inconsistent formatting\n", + " print(\"=== SPECIFIC ISSUES IDENTIFIED ===\")\n", + " \n", + " # Customer ID formatting\n", + " id_patterns = df['customer_id'].str.extract(r'(CUST|cust)(\\d+)').fillna('')\n", + " inconsistent_ids = (id_patterns[0] == 'cust').sum()\n", + " print(f\"Inconsistent customer ID format: {inconsistent_ids} records\")\n", + " \n", + " # Email validation\n", + " email_pattern = r'^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}$'\n", + " invalid_emails = ~df['email'].str.match(email_pattern, na=False)\n", + " print(f\"Invalid email formats: {invalid_emails.sum()} records\")\n", + " \n", + " # Negative purchase amounts\n", + " negative_amounts = (df['purchase_amount'] < 0).sum()\n", + " print(f\"Negative purchase amounts: {negative_amounts} records\")\n", + " \n", + " # Invalid satisfaction scores\n", + " invalid_scores = ((df['satisfaction_score'] < 1) | (df['satisfaction_score'] > 5)) & df['satisfaction_score'].notna()\n", + " print(f\"Invalid satisfaction scores: {invalid_scores.sum()} records\")\n", + " \n", + " # Category inconsistencies\n", + " category_variations = df['category'].value_counts()\n", + " print(f\"\\nCategory variations: {len(category_variations)} different values\")\n", + " print(category_variations)\n", + " \n", + " return issues\n", + "\n", + "issues = identify_issues(df_messy)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 2. Text Data Standardization\n", + "\n", + "Clean and standardize text fields." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Text cleaning functions\n", + "def clean_text_data(df):\n", + " \"\"\"Comprehensive text data cleaning\"\"\"\n", + " df_clean = df.copy()\n", + " \n", + " # Standardize customer names\n", + " print(\"Cleaning customer names...\")\n", + " df_clean['customer_name_clean'] = df_clean['customer_name'].str.strip() # Remove whitespace\n", + " df_clean['customer_name_clean'] = df_clean['customer_name_clean'].str.title() # Title case\n", + " df_clean['customer_name_clean'] = df_clean['customer_name_clean'].str.replace(r'\\s+', ' ', regex=True) # Multiple spaces\n", + " \n", + " # Standardize customer IDs\n", + " print(\"Standardizing customer IDs...\")\n", + " df_clean['customer_id_clean'] = df_clean['customer_id'].str.upper() # All uppercase\n", + " df_clean['customer_id_clean'] = df_clean['customer_id_clean'].str.replace('CUST', 'CUST') # Ensure consistent prefix\n", + " \n", + " # Clean email addresses\n", + " print(\"Cleaning email addresses...\")\n", + " df_clean['email_clean'] = df_clean['email'].str.lower() # Lowercase\n", + " df_clean['email_clean'] = df_clean['email_clean'].str.strip() # Remove whitespace\n", + " df_clean['email_clean'] = df_clean['email_clean'].str.replace(r'\\.{2,}', '.', regex=True) # Multiple dots\n", + " \n", + " # Standardize categories\n", + " print(\"Standardizing categories...\")\n", + " category_mapping = {\n", + " 'electronics': 'Electronics',\n", + " 'ELECTRONICS': 'Electronics',\n", + " 'books': 'Books',\n", + " 'clothing': 'Clothing',\n", + " 'CLOTHING': 'Clothing',\n", + " 'home&garden': 'Home & Garden',\n", + " 'Home & Garden': 'Home & Garden'\n", + " }\n", + " df_clean['category_clean'] = df_clean['category'].map(category_mapping).fillna(df_clean['category'])\n", + " \n", + " return df_clean\n", + "\n", + "# Apply text cleaning\n", + "df_text_clean = clean_text_data(df_messy)\n", + "\n", + "print(\"\\nText cleaning comparison:\")\n", + "comparison_cols = ['customer_name', 'customer_name_clean', 'customer_id', 'customer_id_clean', \n", + " 'email', 'email_clean', 'category', 'category_clean']\n", + "print(df_text_clean[comparison_cols].head(10))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Advanced text cleaning with regex\n", + "def advanced_text_cleaning(df):\n", + " \"\"\"Advanced text cleaning using regular expressions\"\"\"\n", + " df_advanced = df.copy()\n", + " \n", + " # Extract and standardize address components\n", + " print(\"Processing addresses...\")\n", + " # Basic address pattern: number street, city, state zipcode\n", + " address_pattern = r'(\\d+)\\s+([^,]+),\\s*([^,]+),\\s*([A-Z]{2})\\s+(\\d{5})'\n", + " address_parts = df_advanced['address'].str.extract(address_pattern)\n", + " address_parts.columns = ['street_number', 'street_name', 'city', 'state', 'zipcode']\n", + " \n", + " # Clean street names\n", + " address_parts['street_name'] = address_parts['street_name'].str.title()\n", + " address_parts['city'] = address_parts['city'].str.title()\n", + " \n", + " # Combine cleaned parts\n", + " df_advanced['address_clean'] = (\n", + " address_parts['street_number'] + ' ' + address_parts['street_name'] + ', ' +\n", + " address_parts['city'] + ', ' + address_parts['state'] + ' ' + address_parts['zipcode']\n", + " )\n", + " \n", + " # Add individual address components\n", + " for col in address_parts.columns:\n", + " df_advanced[col] = address_parts[col]\n", + " \n", + " return df_advanced\n", + "\n", + "# Apply advanced cleaning\n", + "df_advanced_clean = advanced_text_cleaning(df_text_clean)\n", + "\n", + "print(\"Address cleaning results:\")\n", + "print(df_advanced_clean[['address', 'address_clean', 'city', 'state', 'zipcode']].head())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 3. Phone Number Standardization\n", + "\n", + "Clean and standardize phone numbers using regex patterns." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def standardize_phone_numbers(df):\n", + " \"\"\"Standardize phone numbers to consistent format\"\"\"\n", + " df_phone = df.copy()\n", + " \n", + " def clean_phone(phone):\n", + " \"\"\"Clean individual phone number\"\"\"\n", + " if pd.isna(phone):\n", + " return None\n", + " \n", + " # Remove all non-digit characters\n", + " digits_only = re.sub(r'\\D', '', str(phone))\n", + " \n", + " # Handle different formats\n", + " if len(digits_only) == 10:\n", + " # Format as (XXX) XXX-XXXX\n", + " return f\"({digits_only[:3]}) {digits_only[3:6]}-{digits_only[6:]}\"\n", + " elif len(digits_only) == 11 and digits_only.startswith('1'):\n", + " # Remove country code and format\n", + " phone_part = digits_only[1:]\n", + " return f\"({phone_part[:3]}) {phone_part[3:6]}-{phone_part[6:]}\"\n", + " else:\n", + " # Invalid phone number\n", + " return 'INVALID'\n", + " \n", + " # Apply phone cleaning\n", + " df_phone['phone_clean'] = df_phone['phone'].apply(clean_phone)\n", + " \n", + " # Extract area code\n", + " df_phone['area_code'] = df_phone['phone_clean'].str.extract(r'\\((\\d{3})\\)')\n", + " \n", + " # Flag invalid phone numbers\n", + " df_phone['phone_is_valid'] = df_phone['phone_clean'] != 'INVALID'\n", + " \n", + " return df_phone\n", + "\n", + "# Apply phone standardization\n", + "df_phone_clean = standardize_phone_numbers(df_advanced_clean)\n", + "\n", + "print(\"Phone number standardization:\")\n", + "print(df_phone_clean[['phone', 'phone_clean', 'area_code', 'phone_is_valid']].head(15))\n", + "\n", + "print(\"\\nPhone validation summary:\")\n", + "print(df_phone_clean['phone_is_valid'].value_counts())\n", + "\n", + "print(\"\\nArea code distribution:\")\n", + "print(df_phone_clean['area_code'].value_counts().head())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 4. Date Standardization\n", + "\n", + "Parse and standardize dates from various formats." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def standardize_dates(df):\n", + " \"\"\"Parse and standardize dates from multiple formats\"\"\"\n", + " df_dates = df.copy()\n", + " \n", + " def parse_date(date_str):\n", + " \"\"\"Try to parse date from various formats\"\"\"\n", + " if pd.isna(date_str):\n", + " return None\n", + " \n", + " date_str = str(date_str).strip()\n", + " \n", + " # Common date formats to try\n", + " formats = [\n", + " '%Y-%m-%d', # 2024-01-15\n", + " '%m/%d/%Y', # 01/16/2024\n", + " '%Y-%m-%d', # 2024-1-17 (handled by first format)\n", + " '%d-%m-%Y', # 16-01-2024\n", + " '%Y/%m/%d', # 2024/01/18\n", + " '%B %d, %Y', # January 19, 2024\n", + " '%m-%d-%y', # 01-21-24\n", + " '%Y.%m.%d', # 2024.01.22\n", + " '%d/%m/%Y' # 23/01/2024\n", + " ]\n", + " \n", + " for fmt in formats:\n", + " try:\n", + " return pd.to_datetime(date_str, format=fmt)\n", + " except ValueError:\n", + " continue\n", + " \n", + " # If all else fails, try pandas' flexible parser\n", + " try:\n", + " return pd.to_datetime(date_str, infer_datetime_format=True)\n", + " except:\n", + " return None\n", + " \n", + " # Apply date parsing\n", + " print(\"Parsing dates...\")\n", + " df_dates['purchase_date_clean'] = df_dates['purchase_date'].apply(parse_date)\n", + " \n", + " # Flag unparseable dates\n", + " df_dates['date_is_valid'] = df_dates['purchase_date_clean'].notna()\n", + " \n", + " # Extract date components for valid dates\n", + " df_dates['purchase_year'] = df_dates['purchase_date_clean'].dt.year\n", + " df_dates['purchase_month'] = df_dates['purchase_date_clean'].dt.month\n", + " df_dates['purchase_day'] = df_dates['purchase_date_clean'].dt.day\n", + " df_dates['purchase_day_of_week'] = df_dates['purchase_date_clean'].dt.day_name()\n", + " \n", + " return df_dates\n", + "\n", + "# Apply date standardization\n", + "df_date_clean = standardize_dates(df_phone_clean)\n", + "\n", + "print(\"Date standardization results:\")\n", + "print(df_date_clean[['purchase_date', 'purchase_date_clean', 'date_is_valid', \n", + " 'purchase_year', 'purchase_month', 'purchase_day_of_week']].head(15))\n", + "\n", + "print(\"\\nDate parsing summary:\")\n", + "print(df_date_clean['date_is_valid'].value_counts())\n", + "\n", + "invalid_dates = df_date_clean[~df_date_clean['date_is_valid']]['purchase_date'].unique()\n", + "if len(invalid_dates) > 0:\n", + " print(f\"\\nInvalid date formats found: {invalid_dates}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 5. Numerical Data Cleaning\n", + "\n", + "Handle outliers, invalid values, and missing numerical data." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def clean_numerical_data(df):\n", + " \"\"\"Clean and validate numerical data\"\"\"\n", + " df_numeric = df.copy()\n", + " \n", + " # Clean purchase amounts\n", + " print(\"Cleaning purchase amounts...\")\n", + " \n", + " # Flag invalid values\n", + " df_numeric['amount_is_valid'] = (\n", + " df_numeric['purchase_amount'].notna() & \n", + " (df_numeric['purchase_amount'] >= 0) & \n", + " (df_numeric['purchase_amount'] <= 10000) # Reasonable upper limit\n", + " )\n", + " \n", + " # Replace invalid values with NaN\n", + " df_numeric['purchase_amount_clean'] = df_numeric['purchase_amount'].where(\n", + " df_numeric['amount_is_valid'], np.nan\n", + " )\n", + " \n", + " # Detect outliers using IQR method\n", + " Q1 = df_numeric['purchase_amount_clean'].quantile(0.25)\n", + " Q3 = df_numeric['purchase_amount_clean'].quantile(0.75)\n", + " IQR = Q3 - Q1\n", + " lower_bound = Q1 - 1.5 * IQR\n", + " upper_bound = Q3 + 1.5 * IQR\n", + " \n", + " df_numeric['amount_is_outlier'] = (\n", + " (df_numeric['purchase_amount_clean'] < lower_bound) |\n", + " (df_numeric['purchase_amount_clean'] > upper_bound)\n", + " )\n", + " \n", + " # Clean satisfaction scores\n", + " print(\"Cleaning satisfaction scores...\")\n", + " \n", + " # Valid satisfaction scores are 1-5\n", + " df_numeric['satisfaction_is_valid'] = (\n", + " df_numeric['satisfaction_score'].notna() &\n", + " (df_numeric['satisfaction_score'].between(1, 5))\n", + " )\n", + " \n", + " df_numeric['satisfaction_score_clean'] = df_numeric['satisfaction_score'].where(\n", + " df_numeric['satisfaction_is_valid'], np.nan\n", + " )\n", + " \n", + " return df_numeric\n", + "\n", + "# Apply numerical cleaning\n", + "df_numeric_clean = clean_numerical_data(df_date_clean)\n", + "\n", + "print(\"Numerical data cleaning results:\")\n", + "print(df_numeric_clean[['purchase_amount', 'purchase_amount_clean', 'amount_is_valid', \n", + " 'amount_is_outlier', 'satisfaction_score', 'satisfaction_score_clean', \n", + " 'satisfaction_is_valid']].head(15))\n", + "\n", + "print(\"\\nNumerical data quality summary:\")\n", + "print(f\"Valid purchase amounts: {df_numeric_clean['amount_is_valid'].sum()}/{len(df_numeric_clean)}\")\n", + "print(f\"Outlier amounts: {df_numeric_clean['amount_is_outlier'].sum()}\")\n", + "print(f\"Valid satisfaction scores: {df_numeric_clean['satisfaction_is_valid'].sum()}/{len(df_numeric_clean)}\")\n", + "\n", + "# Show statistics for cleaned data\n", + "print(\"\\nCleaned amount statistics:\")\n", + "print(df_numeric_clean['purchase_amount_clean'].describe())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 6. Duplicate Detection and Handling\n", + "\n", + "Identify and handle duplicate records intelligently." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def handle_duplicates(df):\n", + " \"\"\"Comprehensive duplicate detection and handling\"\"\"\n", + " df_dedup = df.copy()\n", + " \n", + " print(\"=== DUPLICATE ANALYSIS ===\")\n", + " \n", + " # 1. Exact duplicates\n", + " exact_duplicates = df_dedup.duplicated()\n", + " print(f\"Exact duplicate rows: {exact_duplicates.sum()}\")\n", + " \n", + " # 2. Duplicates based on key columns (likely same customer)\n", + " key_cols = ['customer_name_clean', 'email_clean']\n", + " key_duplicates = df_dedup.duplicated(subset=key_cols, keep=False)\n", + " print(f\"Duplicate customers (by name/email): {key_duplicates.sum()}\")\n", + " \n", + " # 3. Near duplicates (similar but not exact)\n", + " # For demonstration, we'll check phone numbers\n", + " phone_duplicates = df_dedup.duplicated(subset=['phone_clean'], keep=False)\n", + " print(f\"Duplicate phone numbers: {phone_duplicates.sum()}\")\n", + " \n", + " # Show duplicate examples\n", + " if key_duplicates.any():\n", + " print(\"\\nExample duplicate customers:\")\n", + " duplicate_customers = df_dedup[key_duplicates].sort_values(key_cols)\n", + " print(duplicate_customers[key_cols + ['customer_id_clean', 'purchase_amount_clean']].head(10))\n", + " \n", + " # Remove exact duplicates\n", + " print(f\"\\nRemoving {exact_duplicates.sum()} exact duplicates...\")\n", + " df_no_exact_dups = df_dedup[~exact_duplicates]\n", + " \n", + " # For customer duplicates, keep the one with the highest purchase amount\n", + " print(\"Handling customer duplicates (keeping highest purchase)...\")\n", + " df_final = df_no_exact_dups.sort_values('purchase_amount_clean', ascending=False).drop_duplicates(\n", + " subset=key_cols, keep='first'\n", + " )\n", + " \n", + " print(f\"Final dataset size after deduplication: {len(df_final)} (was {len(df)})\")\n", + " \n", + " return df_final\n", + "\n", + "# Apply duplicate handling\n", + "df_deduplicated = handle_duplicates(df_numeric_clean)\n", + "\n", + "print(f\"\\nRows removed: {len(df_numeric_clean) - len(df_deduplicated)}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 7. Data Validation and Quality Scores\n", + "\n", + "Create comprehensive data quality metrics." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def calculate_quality_scores(df):\n", + " \"\"\"Calculate comprehensive data quality scores\"\"\"\n", + " df_quality = df.copy()\n", + " \n", + " # Define quality checks\n", + " quality_checks = {\n", + " 'has_customer_name': df_quality['customer_name_clean'].notna(),\n", + " 'has_valid_email': df_quality['email_clean'].notna() & \n", + " df_quality['email_clean'].str.match(r'^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}$', na=False),\n", + " 'has_valid_phone': df_quality['phone_is_valid'] == True,\n", + " 'has_valid_date': df_quality['date_is_valid'] == True,\n", + " 'has_valid_amount': df_quality['amount_is_valid'] == True,\n", + " 'has_valid_satisfaction': df_quality['satisfaction_is_valid'] == True,\n", + " 'amount_not_outlier': df_quality['amount_is_outlier'] == False,\n", + " 'has_complete_address': df_quality['city'].notna() & df_quality['state'].notna() & df_quality['zipcode'].notna()\n", + " }\n", + " \n", + " # Add individual quality flags\n", + " for check_name, check_result in quality_checks.items():\n", + " df_quality[f'quality_{check_name}'] = check_result.astype(int)\n", + " \n", + " # Calculate overall quality score (percentage of passed checks)\n", + " quality_cols = [col for col in df_quality.columns if col.startswith('quality_')]\n", + " df_quality['data_quality_score'] = df_quality[quality_cols].mean(axis=1) * 100\n", + " \n", + " # Categorize quality levels\n", + " def quality_category(score):\n", + " if score >= 90:\n", + " return 'Excellent'\n", + " elif score >= 75:\n", + " return 'Good'\n", + " elif score >= 50:\n", + " return 'Fair'\n", + " else:\n", + " return 'Poor'\n", + " \n", + " df_quality['quality_category'] = df_quality['data_quality_score'].apply(quality_category)\n", + " \n", + " return df_quality, quality_checks\n", + "\n", + "# Calculate quality scores\n", + "df_with_quality, quality_checks = calculate_quality_scores(df_deduplicated)\n", + "\n", + "print(\"Data quality analysis:\")\n", + "print(df_with_quality[['customer_name_clean', 'data_quality_score', 'quality_category']].head(10))\n", + "\n", + "print(\"\\nQuality category distribution:\")\n", + "print(df_with_quality['quality_category'].value_counts())\n", + "\n", + "print(\"\\nAverage quality scores by check:\")\n", + "quality_summary = {}\n", + "for check_name in quality_checks.keys():\n", + " col_name = f'quality_{check_name}'\n", + " quality_summary[check_name] = df_with_quality[col_name].mean() * 100\n", + "\n", + "quality_df = pd.DataFrame(list(quality_summary.items()), columns=['Quality_Check', 'Pass_Rate_%'])\n", + "quality_df = quality_df.sort_values('Pass_Rate_%', ascending=False)\n", + "print(quality_df.round(1))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Practice Exercises\n", + "\n", + "Apply advanced data cleaning techniques to challenging scenarios:" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [], + "source": [ + "# Exercise 1: Create a custom validation function\n", + "# Build a function that validates business rules:\n", + "# - Email domains should be from approved list\n", + "# - Purchase amounts should be within reasonable ranges by category\n", + "# - Dates should be within business operating period\n", + "# - Customer IDs should follow specific format patterns\n", + "\n", + "def validate_business_rules(df):\n", + " \"\"\"Validate business-specific rules\"\"\"\n", + " # Your implementation here\n", + " pass\n", + "\n", + "# validation_results = validate_business_rules(df_final_clean)\n", + "# print(validation_results)" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [], + "source": [ + "# Exercise 2: Advanced duplicate detection\n", + "# Implement fuzzy matching for near-duplicate detection:\n", + "# - Similar names (edit distance)\n", + "# - Similar addresses\n", + "# - Similar email patterns\n", + "\n", + "# Your code here:\n" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [], + "source": [ + "# Exercise 3: Data cleaning metrics dashboard\n", + "# Create a comprehensive data quality dashboard that shows:\n", + "# - Data quality trends over time\n", + "# - Field-by-field quality scores\n", + "# - Impact of cleaning steps\n", + "# - Recommendations for further improvement\n", + "\n", + "# Your code here:\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Key Takeaways\n", + "\n", + "1. **Assessment First**: Always assess data quality before cleaning\n", + "2. **Systematic Approach**: Use a structured pipeline for consistent results\n", + "3. **Preserve Original Data**: Keep original values while creating cleaned versions\n", + "4. **Document Everything**: Log all cleaning steps and decisions\n", + "5. **Validation**: Implement business rule validation\n", + "6. **Quality Metrics**: Measure and track data quality improvements\n", + "7. **Reusable Pipeline**: Create automated, configurable cleaning processes\n", + "8. **Context Matters**: Consider domain-specific requirements\n", + "\n", + "## Common Data Issues and Solutions\n", + "\n", + "| Issue | Detection Method | Solution |\n", + "|-------|-----------------|----------|\n", + "| Inconsistent Format | Pattern analysis | Standardization rules |\n", + "| Missing Values | `.isnull()` | Imputation or flagging |\n", + "| Duplicates | `.duplicated()` | Deduplication logic |\n", + "| Outliers | Statistical methods | Capping or flagging |\n", + "| Invalid Values | Business rules | Validation and correction |\n", + "| Inconsistent Naming | String analysis | Normalization |\n", + "| Date Issues | Parsing attempts | Multiple format handling |\n", + "| Text Issues | Regex patterns | Cleaning and standardization |\n", + "\n", + "## Best Practices\n", + "\n", + "1. **Start with Exploration**: Understand your data before cleaning\n", + "2. **Preserve Traceability**: Keep original and cleaned versions\n", + "3. **Validate Assumptions**: Test cleaning rules on sample data\n", + "4. **Measure Impact**: Quantify improvements from cleaning\n", + "5. **Automate When Possible**: Build reusable cleaning pipelines\n", + "6. **Handle Edge Cases**: Plan for unusual but valid data\n", + "7. **Business Context**: Include domain experts in rule definition\n", + "8. **Iterative Process**: Refine cleaning rules based on results\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.13.3" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/Session_01/ohlcv_analysis.ipynb b/Session_01/ohlcv_analysis.ipynb new file mode 100755 index 0000000..47ef469 --- /dev/null +++ b/Session_01/ohlcv_analysis.ipynb @@ -0,0 +1,1301 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# šŸ“Š Working with CSV Data: Bitcoin Price Analysis\n", + "## A Practical Guide to Data Analysis with Python\n", + "\n", + "This notebook demonstrates **what you can do with CSV data** using a real Bitcoin price dataset:\n", + "\n", + "āœ… **Load and explore CSV files** \n", + "āœ… **Clean and prepare data** \n", + "āœ… **Calculate new metrics from existing data** \n", + "āœ… **Create visualizations** \n", + "āœ… **Find patterns and insights** \n", + "āœ… **Export processed data** \n", + "\n", + "---" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## šŸ“¦ Step 1: Import Libraries" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "āœ… Libraries loaded successfully!\n", + "šŸ“Š Ready to analyze CSV data!\n" + ] + } + ], + "source": [ + "# Essential libraries for data analysis\n", + "import pandas as pd\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "import os\n", + "from datetime import datetime\n", + "\n", + "# Make plots look better\n", + "plt.style.use('default')\n", + "plt.rcParams['figure.figsize'] = (12, 6)\n", + "\n", + "print(\"āœ… Libraries loaded successfully!\")\n", + "print(\"šŸ“Š Ready to analyze CSV data!\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## šŸ“ Step 2: Load CSV Data" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "āœ… Successfully loaded: Data/BTCUSD-1h-data.csv\n", + "šŸ“Š Dataset loaded with 83954 rows and 6 columns\n" + ] + } + ], + "source": [ + "# šŸ” Load CSV file\n", + "file_path = os.path.join(\"Data\", \"BTCUSD-1h-data.csv\")\n", + "\n", + "try:\n", + " # Read the CSV file\n", + " df = pd.read_csv(file_path)\n", + " print(f\"āœ… Successfully loaded: {file_path}\")\n", + "except FileNotFoundError:\n", + " print(\"āŒ File not found. Creating sample data...\")\n", + " \n", + " # Create sample data for demonstration\n", + " dates = pd.date_range('2023-01-01', '2024-01-01', freq='H')\n", + " np.random.seed(42)\n", + " \n", + " # Simulate Bitcoin prices\n", + " price = 30000 # Starting price\n", + " prices = []\n", + " \n", + " for _ in range(len(dates)):\n", + " # Random price movement\n", + " change = np.random.normal(0, 0.01) # 1% average change\n", + " price = price * (1 + change)\n", + " prices.append(price)\n", + " \n", + " # Create DataFrame\n", + " df = pd.DataFrame({\n", + " 'datetime': dates,\n", + " 'open': prices,\n", + " 'high': [p * 1.01 for p in prices],\n", + " 'low': [p * 0.99 for p in prices],\n", + " 'close': prices,\n", + " 'volume': np.random.uniform(1000, 10000, len(dates))\n", + " })\n", + "\n", + "print(f\"šŸ“Š Dataset loaded with {len(df)} rows and {len(df.columns)} columns\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## šŸ‘€ Step 3: Explore Your Data" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "šŸ“‹ FIRST 5 ROWS:\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
datetimeopenhighlowclosevolume
02015-10-03 01:00:00237.73238.37238.20238.36264.238672
12015-10-03 00:00:00237.87238.32238.31238.25192.314590
22015-10-02 23:00:00237.57238.38237.69238.28227.700515
32015-10-02 22:00:00236.97238.08237.96237.68456.360603
42015-10-02 21:00:00237.76238.10237.83238.03243.156239
\n", + "
" + ], + "text/plain": [ + " datetime open high low close volume\n", + "0 2015-10-03 01:00:00 237.73 238.37 238.20 238.36 264.238672\n", + "1 2015-10-03 00:00:00 237.87 238.32 238.31 238.25 192.314590\n", + "2 2015-10-02 23:00:00 237.57 238.38 237.69 238.28 227.700515\n", + "3 2015-10-02 22:00:00 236.97 238.08 237.96 237.68 456.360603\n", + "4 2015-10-02 21:00:00 237.76 238.10 237.83 238.03 243.156239" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "šŸ“‹ LAST 5 ROWS:\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
datetimeopenhighlowclosevolume
839492025-04-08 06:00:0079233.1279841.1379841.1379433.75256.821284
839502025-04-08 05:00:0079500.0380288.7780280.8479841.14217.689942
839512025-04-08 04:00:0079827.8680388.7679882.4980280.84263.614127
839522025-04-08 03:00:0079727.7980187.6479907.6379892.73278.713624
839532025-04-08 02:00:0079867.4680849.9880577.1579907.63397.356575
\n", + "
" + ], + "text/plain": [ + " datetime open high low close volume\n", + "83949 2025-04-08 06:00:00 79233.12 79841.13 79841.13 79433.75 256.821284\n", + "83950 2025-04-08 05:00:00 79500.03 80288.77 80280.84 79841.14 217.689942\n", + "83951 2025-04-08 04:00:00 79827.86 80388.76 79882.49 80280.84 263.614127\n", + "83952 2025-04-08 03:00:00 79727.79 80187.64 79907.63 79892.73 278.713624\n", + "83953 2025-04-08 02:00:00 79867.46 80849.98 80577.15 79907.63 397.356575" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "šŸ“Š BASIC INFO:\n", + "Shape: (83954, 6) (rows, columns)\n", + "Columns: ['datetime', 'open', 'high', 'low', 'close', 'volume']\n", + "Data types:\n", + "datetime object\n", + "open float64\n", + "high float64\n", + "low float64\n", + "close float64\n", + "volume float64\n", + "dtype: object\n" + ] + } + ], + "source": [ + "# šŸ” First look at the data\n", + "print(\"šŸ“‹ FIRST 5 ROWS:\")\n", + "display(df.head())\n", + "\n", + "print(\"\\nšŸ“‹ LAST 5 ROWS:\")\n", + "display(df.tail())\n", + "\n", + "print(\"\\nšŸ“Š BASIC INFO:\")\n", + "print(f\"Shape: {df.shape} (rows, columns)\")\n", + "print(f\"Columns: {list(df.columns)}\")\n", + "print(f\"Data types:\\n{df.dtypes}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "šŸ“ˆ SUMMARY STATISTICS:\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
openhighlowclosevolume
count83954.00000083954.00000083954.00000083954.00000083954.000000
mean23735.56708123956.84790423849.15871123850.217581603.636305
std25032.66968225246.88935225141.44243325142.195598768.710012
min0.060000226.1200000.060000225.5700000.280000
25%4540.1150004589.0000004570.0275004572.865000205.379431
50%11311.11000011410.58500011360.96500011361.345000374.110380
75%38390.20750038837.44500038621.00500038624.597500720.276871
max107631.150000109358.010000108278.800000108276.43000031505.461253
\n", + "
" + ], + "text/plain": [ + " open high low close \\\n", + "count 83954.000000 83954.000000 83954.000000 83954.000000 \n", + "mean 23735.567081 23956.847904 23849.158711 23850.217581 \n", + "std 25032.669682 25246.889352 25141.442433 25142.195598 \n", + "min 0.060000 226.120000 0.060000 225.570000 \n", + "25% 4540.115000 4589.000000 4570.027500 4572.865000 \n", + "50% 11311.110000 11410.585000 11360.965000 11361.345000 \n", + "75% 38390.207500 38837.445000 38621.005000 38624.597500 \n", + "max 107631.150000 109358.010000 108278.800000 108276.430000 \n", + "\n", + " volume \n", + "count 83954.000000 \n", + "mean 603.636305 \n", + "std 768.710012 \n", + "min 0.280000 \n", + "25% 205.379431 \n", + "50% 374.110380 \n", + "75% 720.276871 \n", + "max 31505.461253 " + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "ā“ MISSING DATA:\n", + "datetime 0\n", + "open 0\n", + "high 0\n", + "low 0\n", + "close 0\n", + "volume 0\n", + "dtype: int64\n", + "āœ… No missing data found!\n" + ] + } + ], + "source": [ + "# šŸ“Š Quick statistics\n", + "print(\"šŸ“ˆ SUMMARY STATISTICS:\")\n", + "display(df.describe())\n", + "\n", + "# Check for missing data\n", + "print(\"\\nā“ MISSING DATA:\")\n", + "missing_data = df.isnull().sum()\n", + "print(missing_data)\n", + "\n", + "if missing_data.sum() == 0:\n", + " print(\"āœ… No missing data found!\")\n", + "else:\n", + " print(\"āš ļø Found missing data - we'll need to clean this\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 🧹 Step 4: Clean and Prepare Data" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "āœ… Datetime converted and set as index\n", + "šŸ“… Date range: 2015-09-20 14:00:00 to 2025-04-20 13:00:00\n", + "ā±ļø Duration: 3499 days 23:00:00\n", + "\n", + "🧹 Data cleaning complete:\n", + " Before: 83,954 rows\n", + " After: 83,954 rows\n", + " Removed: 0 rows\n" + ] + } + ], + "source": [ + "# šŸ”„ Convert datetime and set as index\n", + "df['datetime'] = pd.to_datetime(df['datetime'])\n", + "df.set_index('datetime', inplace=True)\n", + "df.sort_index(inplace=True)\n", + "\n", + "print(\"āœ… Datetime converted and set as index\")\n", + "print(f\"šŸ“… Date range: {df.index.min()} to {df.index.max()}\")\n", + "print(f\"ā±ļø Duration: {df.index.max() - df.index.min()}\")\n", + "\n", + "# 🧹 Basic data cleaning\n", + "original_length = len(df)\n", + "\n", + "# Remove any rows with missing values\n", + "df = df.dropna()\n", + "\n", + "# Remove any impossible values (negative prices)\n", + "df = df[df['close'] > 0]\n", + "\n", + "print(f\"\\n🧹 Data cleaning complete:\")\n", + "print(f\" Before: {original_length:,} rows\")\n", + "print(f\" After: {len(df):,} rows\")\n", + "print(f\" Removed: {original_length - len(df):,} rows\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 🧮 Step 5: Calculate New Metrics" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "āœ… New metrics calculated:\n", + " šŸ“Š Total columns now: 16\n", + " šŸ“ˆ New columns: ['sma_10', 'sma_20', 'sma_50', 'volatility', 'signal_above_sma20', 'signal_golden_cross', 'daily_range', 'daily_range_percent']\n" + ] + } + ], + "source": [ + "# šŸ“Š Calculate returns (price changes)\n", + "df['price_change'] = df['close'].diff() # Absolute change\n", + "df['returns'] = df['close'].pct_change() # Percentage change\n", + "df['returns_percent'] = df['returns'] * 100 # As percentage\n", + "\n", + "# šŸ“ˆ Calculate moving averages\n", + "df['sma_10'] = df['close'].rolling(10).mean() # 10-period moving average\n", + "df['sma_20'] = df['close'].rolling(20).mean() # 20-period moving average\n", + "df['sma_50'] = df['close'].rolling(50).mean() # 50-period moving average\n", + "\n", + "# šŸ“Š Calculate volatility (how much price moves)\n", + "df['volatility'] = df['returns'].rolling(20).std()\n", + "\n", + "# šŸŽÆ Create trading signals\n", + "df['signal_above_sma20'] = df['close'] > df['sma_20'] # True when price above 20-day average\n", + "df['signal_golden_cross'] = df['sma_10'] > df['sma_20'] # True when short MA above long MA\n", + "\n", + "# šŸ“Š Price ranges\n", + "df['daily_range'] = df['high'] - df['low']\n", + "df['daily_range_percent'] = (df['daily_range'] / df['close']) * 100\n", + "\n", + "print(\"āœ… New metrics calculated:\")\n", + "print(f\" šŸ“Š Total columns now: {len(df.columns)}\")\n", + "print(f\" šŸ“ˆ New columns: {list(df.columns[-8:])}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "šŸ“‹ DATA WITH NEW CALCULATIONS:\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
closereturns_percentsma_10sma_20volatilitysignal_above_sma20
datetime
2015-09-20 14:00:00232.92NaNNaNNaNNaNFalse
2015-09-20 15:00:00233.240.137386NaNNaNNaNFalse
2015-09-20 16:00:00233.730.210084NaNNaNNaNFalse
2015-09-20 17:00:00233.04-0.295212NaNNaNNaNFalse
2015-09-20 18:00:00232.27-0.330415NaNNaNNaNFalse
2015-09-20 19:00:00232.650.163603NaNNaNNaNFalse
2015-09-20 20:00:00231.88-0.330969NaNNaNNaNFalse
2015-09-20 21:00:00232.420.232879NaNNaNNaNFalse
2015-09-20 22:00:00232.39-0.012908NaNNaNNaNFalse
2015-09-20 23:00:00232.24-0.064547232.678NaNNaNFalse
\n", + "
" + ], + "text/plain": [ + " close returns_percent sma_10 sma_20 volatility \\\n", + "datetime \n", + "2015-09-20 14:00:00 232.92 NaN NaN NaN NaN \n", + "2015-09-20 15:00:00 233.24 0.137386 NaN NaN NaN \n", + "2015-09-20 16:00:00 233.73 0.210084 NaN NaN NaN \n", + "2015-09-20 17:00:00 233.04 -0.295212 NaN NaN NaN \n", + "2015-09-20 18:00:00 232.27 -0.330415 NaN NaN NaN \n", + "2015-09-20 19:00:00 232.65 0.163603 NaN NaN NaN \n", + "2015-09-20 20:00:00 231.88 -0.330969 NaN NaN NaN \n", + "2015-09-20 21:00:00 232.42 0.232879 NaN NaN NaN \n", + "2015-09-20 22:00:00 232.39 -0.012908 NaN NaN NaN \n", + "2015-09-20 23:00:00 232.24 -0.064547 232.678 NaN NaN \n", + "\n", + " signal_above_sma20 \n", + "datetime \n", + "2015-09-20 14:00:00 False \n", + "2015-09-20 15:00:00 False \n", + "2015-09-20 16:00:00 False \n", + "2015-09-20 17:00:00 False \n", + "2015-09-20 18:00:00 False \n", + "2015-09-20 19:00:00 False \n", + "2015-09-20 20:00:00 False \n", + "2015-09-20 21:00:00 False \n", + "2015-09-20 22:00:00 False \n", + "2015-09-20 23:00:00 False " + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "šŸ’” QUICK INSIGHTS:\n", + " šŸ’° Current price: $84,351.47\n", + " šŸ“ˆ Highest price: $108,276.43\n", + " šŸ“‰ Lowest price: $225.57\n", + " šŸ“Š Average daily return: 0.010%\n", + " šŸŽÆ Time above 20-day average: 53.3%\n", + " šŸ“Š Current volatility: 0.0016\n" + ] + } + ], + "source": [ + "# šŸ“Š Let's see our new data\n", + "print(\"šŸ“‹ DATA WITH NEW CALCULATIONS:\")\n", + "display(df[['close', 'returns_percent', 'sma_10', 'sma_20', 'volatility', 'signal_above_sma20']].head(10))\n", + "\n", + "# šŸ“ˆ Quick insights\n", + "print(f\"\\nšŸ’” QUICK INSIGHTS:\")\n", + "print(f\" šŸ’° Current price: ${df['close'].iloc[-1]:,.2f}\")\n", + "print(f\" šŸ“ˆ Highest price: ${df['close'].max():,.2f}\")\n", + "print(f\" šŸ“‰ Lowest price: ${df['close'].min():,.2f}\")\n", + "print(f\" šŸ“Š Average daily return: {df['returns_percent'].mean():.3f}%\")\n", + "print(f\" šŸŽÆ Time above 20-day average: {df['signal_above_sma20'].mean()*100:.1f}%\")\n", + "print(f\" šŸ“Š Current volatility: {df['volatility'].iloc[-1]:.4f}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## šŸ“Š Step 6: Create Visualizations" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "āœ… Charts created! This shows the power of visualizing CSV data.\n" + ] + } + ], + "source": [ + "# šŸ“ˆ Plot 1: Price chart with moving averages\n", + "plt.figure(figsize=(15, 8))\n", + "\n", + "plt.subplot(2, 2, 1)\n", + "plt.plot(df.index, df['close'], label='Bitcoin Price', linewidth=1, alpha=0.8)\n", + "plt.plot(df.index, df['sma_10'], label='10-day Average', linewidth=2)\n", + "plt.plot(df.index, df['sma_20'], label='20-day Average', linewidth=2)\n", + "plt.title('Bitcoin Price with Moving Averages')\n", + "plt.ylabel('Price (USD)')\n", + "plt.legend()\n", + "plt.grid(True, alpha=0.3)\n", + "\n", + "# šŸ“Š Plot 2: Daily returns\n", + "plt.subplot(2, 2, 2)\n", + "plt.plot(df.index, df['returns_percent'], linewidth=0.8, alpha=0.7)\n", + "plt.axhline(y=0, color='black', linestyle='-', alpha=0.3)\n", + "plt.title('Daily Returns (%)')\n", + "plt.ylabel('Return (%)')\n", + "plt.grid(True, alpha=0.3)\n", + "\n", + "# šŸ“ˆ Plot 3: Volume\n", + "plt.subplot(2, 2, 3)\n", + "plt.plot(df.index, df['volume'], linewidth=0.8, alpha=0.7, color='orange')\n", + "plt.title('Trading Volume')\n", + "plt.ylabel('Volume')\n", + "plt.grid(True, alpha=0.3)\n", + "\n", + "# šŸ“Š Plot 4: Volatility\n", + "plt.subplot(2, 2, 4)\n", + "plt.plot(df.index, df['volatility'], linewidth=1.5, color='red')\n", + "plt.title('Price Volatility (20-day)')\n", + "plt.ylabel('Volatility')\n", + "plt.grid(True, alpha=0.3)\n", + "\n", + "plt.tight_layout()\n", + "plt.show()\n", + "\n", + "print(\"āœ… Charts created! This shows the power of visualizing CSV data.\")" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# šŸ“Š Simple histogram of returns\n", + "plt.figure(figsize=(12, 5))\n", + "\n", + "plt.subplot(1, 2, 1)\n", + "plt.hist(df['returns_percent'].dropna(), bins=50, alpha=0.7, edgecolor='black')\n", + "plt.axvline(df['returns_percent'].mean(), color='red', linestyle='--', \n", + " label=f'Average: {df[\"returns_percent\"].mean():.2f}%')\n", + "plt.title('Distribution of Daily Returns')\n", + "plt.xlabel('Daily Return (%)')\n", + "plt.ylabel('Frequency')\n", + "plt.legend()\n", + "plt.grid(True, alpha=0.3)\n", + "\n", + "plt.subplot(1, 2, 2)\n", + "monthly_data = df['close'].resample('ME').last()\n", + "monthly_returns = monthly_data.pct_change() * 100\n", + "plt.bar(range(len(monthly_returns)), monthly_returns, alpha=0.7)\n", + "plt.axhline(y=0, color='black', linestyle='-', alpha=0.5)\n", + "plt.title('Monthly Returns')\n", + "plt.xlabel('Month')\n", + "plt.ylabel('Monthly Return (%)')\n", + "plt.grid(True, alpha=0.3)\n", + "\n", + "plt.tight_layout()\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## šŸ” Step 7: Find Patterns and Insights" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "ā° HOURLY PATTERNS:\n", + "\n", + "šŸ“Š Best and worst hours for returns:\n", + " 🟢 Best hour: 22:00 (avg return: 0.051%)\n", + " šŸ”“ Worst hour: 3:00 (avg return: -0.018%)\n", + "\n", + "šŸ“Š Trading activity:\n", + " šŸ“ˆ Highest volume: 14:00\n", + " šŸ“‰ Lowest volume: 9:00\n" + ] + } + ], + "source": [ + "# šŸ• Analyze patterns by hour of day\n", + "df['hour'] = df.index.hour\n", + "hourly_avg = df.groupby('hour').agg({\n", + " 'returns_percent': 'mean',\n", + " 'volume': 'mean',\n", + " 'volatility': 'mean'\n", + "})\n", + "\n", + "print(\"ā° HOURLY PATTERNS:\")\n", + "print(\"\\nšŸ“Š Best and worst hours for returns:\")\n", + "best_hour = hourly_avg['returns_percent'].idxmax()\n", + "worst_hour = hourly_avg['returns_percent'].idxmin()\n", + "print(f\" 🟢 Best hour: {best_hour}:00 (avg return: {hourly_avg.loc[best_hour, 'returns_percent']:.3f}%)\")\n", + "print(f\" šŸ”“ Worst hour: {worst_hour}:00 (avg return: {hourly_avg.loc[worst_hour, 'returns_percent']:.3f}%)\")\n", + "\n", + "print(f\"\\nšŸ“Š Trading activity:\")\n", + "high_volume_hour = hourly_avg['volume'].idxmax()\n", + "low_volume_hour = hourly_avg['volume'].idxmin()\n", + "print(f\" šŸ“ˆ Highest volume: {high_volume_hour}:00\")\n", + "print(f\" šŸ“‰ Lowest volume: {low_volume_hour}:00\")" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "šŸŽ¢ BIG MOVES ANALYSIS (>5% daily change):\n", + " šŸ“Š Total big moves: 172\n", + " šŸ“ˆ Biggest gain: 22.04%\n", + " šŸ“‰ Biggest loss: -16.97%\n", + " šŸŽÆ Frequency: 0.2% of all days\n", + "\n", + "šŸ“… Most recent big moves:\n", + " 2023-03-03: -5.37% (Price: $22,162.35)\n", + " 2023-04-26: -6.19% (Price: $27,894.99)\n", + " 2023-08-29: +5.73% (Price: $27,524.88)\n", + " 2023-10-23: +5.27% (Price: $33,320.27)\n", + " 2025-04-09: +5.09% (Price: $82,183.56)\n" + ] + } + ], + "source": [ + "# šŸ“Š Analyze big moves\n", + "big_moves = df[abs(df['returns_percent']) > 5] # Days with >5% moves\n", + "\n", + "print(f\"šŸŽ¢ BIG MOVES ANALYSIS (>5% daily change):\")\n", + "print(f\" šŸ“Š Total big moves: {len(big_moves)}\")\n", + "print(f\" šŸ“ˆ Biggest gain: {df['returns_percent'].max():.2f}%\")\n", + "print(f\" šŸ“‰ Biggest loss: {df['returns_percent'].min():.2f}%\")\n", + "print(f\" šŸŽÆ Frequency: {len(big_moves)/len(df)*100:.1f}% of all days\")\n", + "\n", + "if len(big_moves) > 0:\n", + " print(f\"\\nšŸ“… Most recent big moves:\")\n", + " recent_big_moves = big_moves.tail(5)[['close', 'returns_percent', 'volume']]\n", + " for date, row in recent_big_moves.iterrows():\n", + " print(f\" {date.strftime('%Y-%m-%d')}: {row['returns_percent']:+.2f}% (Price: ${row['close']:,.2f})\")" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "šŸŽÆ TRADING SIGNALS ANALYSIS:\n", + "\n", + "šŸ“Š Price above 20-day average:\n", + " āœ… 53.3% of the time\n", + " āŒ 46.7% below average\n", + "\n", + "šŸ“Š Golden Cross signal (short MA > long MA):\n", + " 🟢 Active 53.2% of the time\n", + "\n", + "šŸ“Š CURRENT STATUS:\n", + " šŸ’° Price: $84,351.47\n", + " šŸ“ˆ 20-day average: $84,907.43\n", + " šŸŽÆ Signal: šŸ”“ Below average\n", + " šŸ“Š Distance from average: -0.65%\n" + ] + } + ], + "source": [ + "# šŸŽÆ Trading signal analysis\n", + "signal_stats = df['signal_above_sma20'].value_counts()\n", + "golden_cross_stats = df['signal_golden_cross'].value_counts()\n", + "\n", + "print(\"šŸŽÆ TRADING SIGNALS ANALYSIS:\")\n", + "print(f\"\\nšŸ“Š Price above 20-day average:\")\n", + "if True in signal_stats.index:\n", + " above_pct = signal_stats[True] / len(df) * 100\n", + " print(f\" āœ… {above_pct:.1f}% of the time\")\n", + "if False in signal_stats.index:\n", + " below_pct = signal_stats[False] / len(df) * 100\n", + " print(f\" āŒ {below_pct:.1f}% below average\")\n", + "\n", + "print(f\"\\nšŸ“Š Golden Cross signal (short MA > long MA):\")\n", + "if True in golden_cross_stats.index:\n", + " golden_pct = golden_cross_stats[True] / len(df) * 100\n", + " print(f\" 🟢 Active {golden_pct:.1f}% of the time\")\n", + "\n", + "# Current status\n", + "current_price = df['close'].iloc[-1]\n", + "current_sma20 = df['sma_20'].iloc[-1]\n", + "current_signal = df['signal_above_sma20'].iloc[-1]\n", + "\n", + "print(f\"\\nšŸ“Š CURRENT STATUS:\")\n", + "print(f\" šŸ’° Price: ${current_price:,.2f}\")\n", + "print(f\" šŸ“ˆ 20-day average: ${current_sma20:,.2f}\")\n", + "print(f\" šŸŽÆ Signal: {'🟢 Above average' if current_signal else 'šŸ”“ Below average'}\")\n", + "print(f\" šŸ“Š Distance from average: {((current_price/current_sma20-1)*100):+.2f}%\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## šŸ’¾ Step 8: Export Your Results" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "āœ… Enhanced dataset saved: Data/bitcoin_analysis.csv\n", + " šŸ“Š Contains 83954 rows and 17 columns\n", + "āœ… Summary statistics saved: Data/bitcoin_summary.csv\n", + "āœ… Recent data saved: Data/bitcoin_recent.csv\n", + "\n", + "šŸ“ All files saved to: /Users/alex/Dev/cryptoTraining/Session-01/Data\n" + ] + } + ], + "source": [ + "# šŸ“ Create output folder\n", + "output_folder = \"Data\"\n", + "if not os.path.exists(output_folder):\n", + " os.makedirs(output_folder)\n", + " print(f\"šŸ“ Created folder: {output_folder}\")\n", + "\n", + "# šŸ’¾ Export enhanced dataset\n", + "enhanced_file = os.path.join(output_folder, \"bitcoin_analysis.csv\")\n", + "df.to_csv(enhanced_file)\n", + "print(f\"āœ… Enhanced dataset saved: {enhanced_file}\")\n", + "print(f\" šŸ“Š Contains {len(df)} rows and {len(df.columns)} columns\")\n", + "\n", + "# šŸ’¾ Export summary statistics\n", + "summary_file = os.path.join(output_folder, \"bitcoin_summary.csv\")\n", + "summary_stats = df[['close', 'returns_percent', 'sma_20', 'volatility']].describe()\n", + "summary_stats.to_csv(summary_file)\n", + "print(f\"āœ… Summary statistics saved: {summary_file}\")\n", + "\n", + "# šŸ’¾ Export recent data only (last 100 rows)\n", + "recent_file = os.path.join(output_folder, \"bitcoin_recent.csv\")\n", + "df.tail(100).to_csv(recent_file)\n", + "print(f\"āœ… Recent data saved: {recent_file}\")\n", + "\n", + "print(f\"\\nšŸ“ All files saved to: {os.path.abspath(output_folder)}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## šŸ“‹ Step 9: Summary Report" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "šŸ“‹ FINAL ANALYSIS SUMMARY\n", + "==================================================\n", + "šŸ“ DATA PROCESSED:\n", + " šŸ“Š Total records: 83,954\n", + " šŸ“… Date range: 2015-09-20 to 2025-04-20\n", + " ā±ļø Duration: 3499 days\n", + " šŸ“ˆ Features created: 17\n", + "\n", + "šŸ’° PRICE ANALYSIS:\n", + " šŸ’µ Current price: $84,351.47\n", + " šŸ“ˆ Highest price: $108,276.43\n", + " šŸ“‰ Lowest price: $225.57\n", + " šŸ“Š Total return: +36114.78%\n", + "\n", + "šŸ“Š VOLATILITY & PATTERNS:\n", + " šŸ“Š Average daily return: 0.010%\n", + " šŸ“ˆ Best day: +22.04%\n", + " šŸ“‰ Worst day: -16.97%\n", + " šŸŽÆ Days above 20-day average: 53.3%\n", + "\n", + "šŸ’¾ FILES CREATED:\n", + " šŸ“ Enhanced dataset: bitcoin_analysis_enhanced.csv\n", + " šŸ“Š Summary statistics: bitcoin_summary.csv\n", + " šŸ“… Recent data: bitcoin_recent.csv\n", + "\n", + "šŸŽ‰ WHAT WE ACCOMPLISHED:\n", + " āœ… Loaded and cleaned CSV data\n", + " āœ… Created 11 new calculated columns\n", + " āœ… Generated 4 visualizations\n", + " āœ… Found patterns in the data\n", + " āœ… Exported enhanced datasets\n", + "\n", + "šŸš€ This demonstrates the power of CSV data analysis!\n" + ] + } + ], + "source": [ + "# šŸ“Š Create a final summary\n", + "print(\"šŸ“‹ FINAL ANALYSIS SUMMARY\")\n", + "print(\"=\" * 50)\n", + "\n", + "print(f\"šŸ“ DATA PROCESSED:\")\n", + "print(f\" šŸ“Š Total records: {len(df):,}\")\n", + "print(f\" šŸ“… Date range: {df.index.min().strftime('%Y-%m-%d')} to {df.index.max().strftime('%Y-%m-%d')}\")\n", + "print(f\" ā±ļø Duration: {(df.index.max() - df.index.min()).days} days\")\n", + "print(f\" šŸ“ˆ Features created: {len(df.columns)}\")\n", + "\n", + "print(f\"\\nšŸ’° PRICE ANALYSIS:\")\n", + "print(f\" šŸ’µ Current price: ${df['close'].iloc[-1]:,.2f}\")\n", + "print(f\" šŸ“ˆ Highest price: ${df['close'].max():,.2f}\")\n", + "print(f\" šŸ“‰ Lowest price: ${df['close'].min():,.2f}\")\n", + "print(f\" šŸ“Š Total return: {((df['close'].iloc[-1] / df['close'].iloc[0] - 1) * 100):+.2f}%\")\n", + "\n", + "print(f\"\\nšŸ“Š VOLATILITY & PATTERNS:\")\n", + "print(f\" šŸ“Š Average daily return: {df['returns_percent'].mean():.3f}%\")\n", + "print(f\" šŸ“ˆ Best day: {df['returns_percent'].max():+.2f}%\")\n", + "print(f\" šŸ“‰ Worst day: {df['returns_percent'].min():+.2f}%\")\n", + "print(f\" šŸŽÆ Days above 20-day average: {df['signal_above_sma20'].mean()*100:.1f}%\")\n", + "\n", + "print(f\"\\nšŸ’¾ FILES CREATED:\")\n", + "print(f\" šŸ“ Enhanced dataset: bitcoin_analysis_enhanced.csv\")\n", + "print(f\" šŸ“Š Summary statistics: bitcoin_summary.csv\")\n", + "print(f\" šŸ“… Recent data: bitcoin_recent.csv\")\n", + "\n", + "print(f\"\\nšŸŽ‰ WHAT WE ACCOMPLISHED:\")\n", + "print(f\" āœ… Loaded and cleaned CSV data\")\n", + "print(f\" āœ… Created {len(df.columns) - 6} new calculated columns\")\n", + "print(f\" āœ… Generated {4} visualizations\")\n", + "print(f\" āœ… Found patterns in the data\")\n", + "print(f\" āœ… Exported enhanced datasets\")\n", + "print(f\"\\nšŸš€ This demonstrates the power of CSV data analysis!\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## šŸŽ“ Learning Summary: What You Can Do with CSV Data\n", + "\n", + "### šŸ“Š **Data Operations We Performed:**\n", + "1. **Load CSV files** with `pd.read_csv()`\n", + "2. **Explore data** with `.head()`, `.describe()`, `.info()`\n", + "3. **Clean data** by removing missing values and invalid entries\n", + "4. **Transform data** by converting dates and setting indexes\n", + "5. **Calculate new metrics** from existing columns\n", + "6. **Create visualizations** to understand patterns\n", + "7. **Find insights** through grouping and analysis\n", + "8. **Export results** to new CSV files\n", + "\n", + "### šŸ”§ **Key Skills Demonstrated:**\n", + "- **Data Loading**: Reading CSV files into pandas DataFrames\n", + "- **Data Cleaning**: Handling missing values and data quality issues\n", + "- **Feature Engineering**: Creating new columns from existing data\n", + "- **Time Series Analysis**: Working with datetime indexes\n", + "- **Statistical Analysis**: Calculating means, rolling averages, and patterns\n", + "- **Data Visualization**: Creating charts to explore data\n", + "- **Pattern Recognition**: Finding trends and anomalies\n", + "- **Data Export**: Saving processed data for future use\n", + "\n", + "### šŸ’” **Real-World Applications:**\n", + "- **Business Analytics**: Sales data, customer behavior, inventory analysis\n", + "- **Financial Analysis**: Stock prices, trading volumes, risk metrics\n", + "- **Marketing**: Campaign performance, customer segmentation\n", + "- **Operations**: Process optimization, quality control\n", + "- **Research**: Scientific data analysis, survey results\n", + "\n", + "---\n", + "\n", + "**Remember**: CSV files are everywhere! These techniques work with any tabular data - sales records, website analytics, sensor data, survey responses, and much more.\n", + "\n", + "*This example used Bitcoin data, but the same principles apply to ANY CSV dataset.*" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.13.3" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/Session_01/ohlcv_analysis_advanced.ipynb b/Session_01/ohlcv_analysis_advanced.ipynb new file mode 100755 index 0000000..8dc4e2c --- /dev/null +++ b/Session_01/ohlcv_analysis_advanced.ipynb @@ -0,0 +1,1733 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# šŸš€ Advanced Price Analysis\n", + "## Technical Analysis, Risk Metrics, and Market Insights\n", + "\n", + "This notebook provides an enhanced analysis of Bitcoin (BTCUSD) hourly price data, including:\n", + "- **Data preprocessing and quality assessment**\n", + "- **Basic technical indicators (moving averages)**\n", + "- **Risk analysis and performance metrics**\n", + "- **Statistical analysis and seasonality**\n", + "- **Monte Carlo simulation for forecasting**\n", + "- **Comprehensive visualizations**\n", + "\n", + "---" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## šŸ“¦ Import Libraries and Setup" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "āœ… All libraries imported successfully!\n" + ] + } + ], + "source": [ + "# Data manipulation and analysis\n", + "import pandas as pd\n", + "import numpy as np\n", + "import os\n", + "import warnings\n", + "warnings.filterwarnings('ignore')\n", + "\n", + "# Visualization\n", + "import matplotlib.pyplot as plt\n", + "import seaborn as sns\n", + "\n", + "# Statistical analysis\n", + "from scipy import stats\n", + "import statsmodels.api as sm\n", + "\n", + "# Set style\n", + "plt.style.use('seaborn-v0_8')\n", + "sns.set_palette(\"husl\")\n", + "\n", + "# Display options\n", + "pd.set_option('display.max_columns', None)\n", + "pd.set_option('display.width', None)\n", + "\n", + "print(\"āœ… All libraries imported successfully!\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## šŸ“Š Data Loading and Initial Exploration" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "āœ… Successfully loaded data from: Data/BTCUSD-1h-data.csv\n", + "šŸ“ Dataset shape: (83954, 6)\n" + ] + } + ], + "source": [ + "# 1ļøāƒ£ Load CSV from 'Data/' folder\n", + "file_path = os.path.join(\"Data\", \"BTCUSD-1h-data.csv\")\n", + "\n", + "try:\n", + " df = pd.read_csv(file_path)\n", + " print(f\"āœ… Successfully loaded data from: {file_path}\")\n", + " print(f\"šŸ“ Dataset shape: {df.shape}\")\n", + "except FileNotFoundError:\n", + " print(f\"āŒ File not found: {file_path}\")\n", + " print(\"Creating sample data for demonstration...\")\n", + " \n", + " # Create sample Bitcoin data\n", + " date_range = pd.date_range(start='2020-01-01', end='2024-12-31', freq='H')\n", + " np.random.seed(42)\n", + " \n", + " # Simulate realistic Bitcoin price movements\n", + " returns = np.random.normal(0.0002, 0.02, len(date_range))\n", + " price = 7000 # Starting price\n", + " prices = []\n", + " \n", + " for ret in returns:\n", + " price *= (1 + ret)\n", + " prices.append(price)\n", + " \n", + " # Create OHLCV data\n", + " df = pd.DataFrame({\n", + " 'timestamp': date_range,\n", + " 'open': prices,\n", + " 'high': [p * (1 + abs(np.random.normal(0, 0.01))) for p in prices],\n", + " 'low': [p * (1 - abs(np.random.normal(0, 0.01))) for p in prices],\n", + " 'close': prices,\n", + " 'volume': np.random.lognormal(15, 1, len(date_range))\n", + " })\n", + " \n", + " # Ensure OHLC relationships are correct\n", + " df['high'] = df[['open', 'high', 'close']].max(axis=1)\n", + " df['low'] = df[['open', 'low', 'close']].min(axis=1)\n", + " \n", + " print(f\"šŸ“Š Created sample dataset with {len(df)} records\")" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "šŸ“Š DATASET OVERVIEW\n", + "==================================================\n", + "šŸ“… Date range: 2015-09-20 14:00:00 to 2025-04-20 13:00:00\n", + "ā±ļø Duration: 3499 days 23:00:00\n", + "šŸ“ Total records: 83,954\n", + "šŸ“Š Columns: ['open', 'high', 'low', 'close', 'volume']\n", + "\n", + "šŸ“Š First few rows:\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
openhighlowclosevolume
datetime
2015-09-20 14:00:00232.02232.94232.04232.92105.562700
2015-09-20 15:00:00232.77233.61232.86233.2499.734100
2015-09-20 16:00:00233.00233.73233.17233.7391.036000
2015-09-20 17:00:00233.00234.00233.65233.04322.799438
2015-09-20 18:00:00232.25233.54233.05232.27213.313320
\n", + "
" + ], + "text/plain": [ + " open high low close volume\n", + "datetime \n", + "2015-09-20 14:00:00 232.02 232.94 232.04 232.92 105.562700\n", + "2015-09-20 15:00:00 232.77 233.61 232.86 233.24 99.734100\n", + "2015-09-20 16:00:00 233.00 233.73 233.17 233.73 91.036000\n", + "2015-09-20 17:00:00 233.00 234.00 233.65 233.04 322.799438\n", + "2015-09-20 18:00:00 232.25 233.54 233.05 232.27 213.313320" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "šŸ“ˆ Descriptive statistics:\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
openhighlowclosevolume
count83954.00000083954.00000083954.00000083954.00000083954.000000
mean23735.56708123956.84790423849.15871123850.217581603.636305
std25032.66968225246.88935225141.44243325142.195598768.710012
min0.060000226.1200000.060000225.5700000.280000
25%4540.1150004589.0000004570.0275004572.865000205.379431
50%11311.11000011410.58500011360.96500011361.345000374.110380
75%38390.20750038837.44500038621.00500038624.597500720.276871
max107631.150000109358.010000108278.800000108276.43000031505.461253
\n", + "
" + ], + "text/plain": [ + " open high low close \\\n", + "count 83954.000000 83954.000000 83954.000000 83954.000000 \n", + "mean 23735.567081 23956.847904 23849.158711 23850.217581 \n", + "std 25032.669682 25246.889352 25141.442433 25142.195598 \n", + "min 0.060000 226.120000 0.060000 225.570000 \n", + "25% 4540.115000 4589.000000 4570.027500 4572.865000 \n", + "50% 11311.110000 11410.585000 11360.965000 11361.345000 \n", + "75% 38390.207500 38837.445000 38621.005000 38624.597500 \n", + "max 107631.150000 109358.010000 108278.800000 108276.430000 \n", + "\n", + " volume \n", + "count 83954.000000 \n", + "mean 603.636305 \n", + "std 768.710012 \n", + "min 0.280000 \n", + "25% 205.379431 \n", + "50% 374.110380 \n", + "75% 720.276871 \n", + "max 31505.461253 " + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# 2ļøāƒ£ Convert datetime & set index\n", + "df['datetime'] = pd.to_datetime(df['datetime'])\n", + "df.set_index('datetime', inplace=True)\n", + "df.sort_index(inplace=True)\n", + "\n", + "# 3ļøāƒ£ Basic inspection\n", + "print(\"šŸ“Š DATASET OVERVIEW\")\n", + "print(\"=\" * 50)\n", + "print(f\"šŸ“… Date range: {df.index.min()} to {df.index.max()}\")\n", + "print(f\"ā±ļø Duration: {df.index.max() - df.index.min()}\")\n", + "print(f\"šŸ“ Total records: {len(df):,}\")\n", + "print(f\"šŸ“Š Columns: {list(df.columns)}\")\n", + "\n", + "print(\"\\nšŸ“Š First few rows:\")\n", + "display(df.head())\n", + "\n", + "print(\"\\nšŸ“ˆ Descriptive statistics:\")\n", + "display(df.describe())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 🧹 Data Quality Assessment and Cleaning" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "šŸ” DATA QUALITY ASSESSMENT\n", + "==================================================\n", + "ā— Missing values analysis:\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Missing CountMissing %
open00.0
high00.0
low00.0
close00.0
volume00.0
\n", + "
" + ], + "text/plain": [ + " Missing Count Missing %\n", + "open 0 0.0\n", + "high 0 0.0\n", + "low 0 0.0\n", + "close 0 0.0\n", + "volume 0 0.0" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "šŸ”„ Duplicate timestamps: 0\n", + "\n", + "āš ļø Zero/negative prices:\n", + "open 0\n", + "high 0\n", + "low 0\n", + "close 0\n", + "dtype: int64\n", + "\n", + "🧹 CLEANING DATA...\n", + "āœ… Cleaned dataset: 83,954 → 83,954 records\n", + "šŸ“‰ Removed: 0 records (0.00%)\n" + ] + } + ], + "source": [ + "# 4ļøāƒ£ Data quality checks\n", + "print(\"šŸ” DATA QUALITY ASSESSMENT\")\n", + "print(\"=\" * 50)\n", + "\n", + "# Check for missing values\n", + "missing_counts = df.isnull().sum()\n", + "missing_pct = (missing_counts / len(df)) * 100\n", + "\n", + "missing_df = pd.DataFrame({\n", + " 'Missing Count': missing_counts,\n", + " 'Missing %': missing_pct\n", + "})\n", + "\n", + "print(\"ā— Missing values analysis:\")\n", + "display(missing_df)\n", + "\n", + "# Check for duplicates\n", + "duplicates = df.index.duplicated().sum()\n", + "print(f\"\\nšŸ”„ Duplicate timestamps: {duplicates}\")\n", + "\n", + "# Check for zero/negative prices\n", + "zero_prices = (df[['open', 'high', 'low', 'close']] <= 0).sum()\n", + "print(f\"\\nāš ļø Zero/negative prices:\")\n", + "print(zero_prices)\n", + "\n", + "# Data cleaning\n", + "print(\"\\n🧹 CLEANING DATA...\")\n", + "original_length = len(df)\n", + "\n", + "# Remove duplicates and missing values\n", + "df = df[~df.index.duplicated(keep='first')]\n", + "df = df.dropna()\n", + "df = df[(df[['open', 'high', 'low', 'close']] > 0).all(axis=1)]\n", + "\n", + "print(f\"āœ… Cleaned dataset: {original_length:,} → {len(df):,} records\")\n", + "print(f\"šŸ“‰ Removed: {original_length - len(df):,} records ({((original_length - len(df))/original_length)*100:.2f}%)\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## šŸ“ˆ Price Metrics and Returns Analysis" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "šŸ“Š CALCULATING RETURN METRICS\n", + "==================================================\n", + "āœ… Return metrics calculated\n", + "\n", + "šŸ“Š RETURNS STATISTICS\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
MeanStdSkewnessKurtosis
1H Returns0.0001010.0078260.03307935.938209
24H Returns0.0023700.0368050.0839736.813458
7D Returns0.0166950.0989740.4266662.811554
\n", + "
" + ], + "text/plain": [ + " Mean Std Skewness Kurtosis\n", + "1H Returns 0.000101 0.007826 0.033079 35.938209\n", + "24H Returns 0.002370 0.036805 0.083973 6.813458\n", + "7D Returns 0.016695 0.098974 0.426666 2.811554" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "šŸ“ˆ ANNUALIZED METRICS\n", + "Annual Return: 88.33%\n", + "Annual Volatility: 73.25%\n", + "Sharpe Ratio: 1.206\n" + ] + } + ], + "source": [ + "# 5ļøāƒ£ Calculate return metrics\n", + "print(\"šŸ“Š CALCULATING RETURN METRICS\")\n", + "print(\"=\" * 50)\n", + "\n", + "# Basic returns\n", + "df['returns'] = df['close'].pct_change()\n", + "df['log_returns'] = np.log(df['close'] / df['close'].shift(1))\n", + "\n", + "# Multi-period returns\n", + "df['returns_24h'] = df['close'].pct_change(24)\n", + "df['returns_7d'] = df['close'].pct_change(24*7)\n", + "\n", + "# Cumulative returns\n", + "df['cumulative_returns'] = (1 + df['returns']).cumprod() - 1\n", + "\n", + "# Price range metrics\n", + "df['hl_range_pct'] = ((df['high'] - df['low']) / df['close']) * 100\n", + "df['oc_change_pct'] = ((df['close'] - df['open']) / df['open']) * 100\n", + "\n", + "print(\"āœ… Return metrics calculated\")\n", + "\n", + "# Returns statistics\n", + "returns_stats = pd.DataFrame({\n", + " 'Mean': [df['returns'].mean(), df['returns_24h'].mean(), df['returns_7d'].mean()],\n", + " 'Std': [df['returns'].std(), df['returns_24h'].std(), df['returns_7d'].std()],\n", + " 'Skewness': [df['returns'].skew(), df['returns_24h'].skew(), df['returns_7d'].skew()],\n", + " 'Kurtosis': [df['returns'].kurtosis(), df['returns_24h'].kurtosis(), df['returns_7d'].kurtosis()]\n", + "}, index=['1H Returns', '24H Returns', '7D Returns'])\n", + "\n", + "print(\"\\nšŸ“Š RETURNS STATISTICS\")\n", + "display(returns_stats.round(6))\n", + "\n", + "# Annualized metrics\n", + "hours_per_year = 24 * 365\n", + "annual_return = df['returns'].mean() * hours_per_year\n", + "annual_volatility = df['returns'].std() * np.sqrt(hours_per_year)\n", + "sharpe_ratio = annual_return / annual_volatility if annual_volatility != 0 else 0\n", + "\n", + "print(f\"\\nšŸ“ˆ ANNUALIZED METRICS\")\n", + "print(f\"Annual Return: {annual_return:.2%}\")\n", + "print(f\"Annual Volatility: {annual_volatility:.2%}\")\n", + "print(f\"Sharpe Ratio: {sharpe_ratio:.3f}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## šŸ”§ Technical Indicators (Moving Averages Only)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "šŸ”§ CALCULATING TECHNICAL INDICATORS\n", + "==================================================\n", + "āœ… Technical indicators calculated\n", + "šŸ“Š Total columns: 31\n", + "\n", + "šŸ“Š SIGNAL STATISTICS\n", + "Price above SMA 10: 53.0% of time\n", + "Price above SMA 20: 53.3% of time\n", + "Golden Cross active: 53.2% of time\n" + ] + } + ], + "source": [ + "# 6ļøāƒ£ Simple moving averages and basic technical indicators\n", + "print(\"šŸ”§ CALCULATING TECHNICAL INDICATORS\")\n", + "print(\"=\" * 50)\n", + "\n", + "# Simple Moving Averages\n", + "sma_periods = [5, 10, 20, 50, 100]\n", + "for period in sma_periods:\n", + " df[f'sma_{period}'] = df['close'].rolling(period).mean()\n", + "\n", + "# Exponential Moving Averages\n", + "ema_periods = [5, 10, 20, 50]\n", + "for period in ema_periods:\n", + " df[f'ema_{period}'] = df['close'].ewm(span=period).mean()\n", + "\n", + "# Rolling volatility\n", + "volatility_periods = [10, 20, 50]\n", + "for period in volatility_periods:\n", + " df[f'volatility_{period}'] = df['returns'].rolling(period).std()\n", + "\n", + "# Volume moving averages\n", + "df['volume_sma_20'] = df['volume'].rolling(20).mean()\n", + "df['volume_ratio'] = df['volume'] / df['volume_sma_20']\n", + "\n", + "# Price position indicators\n", + "for period in [20, 50]:\n", + " df[f'price_position_{period}'] = (\n", + " (df['close'] - df['close'].rolling(period).min()) / \n", + " (df['close'].rolling(period).max() - df['close'].rolling(period).min())\n", + " )\n", + "\n", + "# Basic trading signals\n", + "df['above_sma_10'] = df['close'] > df['sma_10']\n", + "df['above_sma_20'] = df['close'] > df['sma_20']\n", + "df['golden_cross'] = df['sma_10'] > df['sma_20']\n", + "\n", + "print(\"āœ… Technical indicators calculated\")\n", + "print(f\"šŸ“Š Total columns: {len(df.columns)}\")\n", + "\n", + "# Signal statistics\n", + "above_sma10_pct = df['above_sma_10'].mean() * 100\n", + "above_sma20_pct = df['above_sma_20'].mean() * 100\n", + "golden_cross_pct = df['golden_cross'].mean() * 100\n", + "\n", + "print(f\"\\nšŸ“Š SIGNAL STATISTICS\")\n", + "print(f\"Price above SMA 10: {above_sma10_pct:.1f}% of time\")\n", + "print(f\"Price above SMA 20: {above_sma20_pct:.1f}% of time\")\n", + "print(f\"Golden Cross active: {golden_cross_pct:.1f}% of time\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## šŸ“Š Basic Visualizations" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "šŸ“Š CREATING VISUALIZATIONS\n", + "==================================================\n" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# 7ļøāƒ£ Create visualizations\n", + "print(\"šŸ“Š CREATING VISUALIZATIONS\")\n", + "print(\"=\" * 50)\n", + "\n", + "# Main price chart with moving averages\n", + "fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(20, 12))\n", + "\n", + "# Plot 1: Price with moving averages\n", + "ax1.plot(df.index, df['close'], label='Close Price', alpha=0.7, linewidth=1)\n", + "ax1.plot(df.index, df['sma_10'], label='SMA 10', linestyle='--', alpha=0.8)\n", + "ax1.plot(df.index, df['sma_20'], label='SMA 20', linestyle='--', alpha=0.8)\n", + "ax1.plot(df.index, df['sma_50'], label='SMA 50', linestyle='--', alpha=0.8)\n", + "ax1.set_title('Bitcoin Price with Moving Averages', fontsize=14, fontweight='bold')\n", + "ax1.set_ylabel('Price (USD)')\n", + "ax1.legend()\n", + "ax1.grid(True, alpha=0.3)\n", + "\n", + "# Plot 2: Returns distribution\n", + "returns_clean = df['returns'].dropna()\n", + "ax2.hist(returns_clean, bins=100, alpha=0.7, edgecolor='black')\n", + "ax2.axvline(returns_clean.mean(), color='red', linestyle='--', label=f'Mean: {returns_clean.mean():.4f}')\n", + "ax2.axvline(returns_clean.median(), color='green', linestyle='--', label=f'Median: {returns_clean.median():.4f}')\n", + "ax2.set_title('Hourly Returns Distribution', fontsize=14, fontweight='bold')\n", + "ax2.set_xlabel('Returns')\n", + "ax2.set_ylabel('Frequency')\n", + "ax2.legend()\n", + "ax2.grid(True, alpha=0.3)\n", + "\n", + "# Plot 3: Volatility over time\n", + "ax3.plot(df.index, df['volatility_10'] * 100, label='10H Volatility', alpha=0.8)\n", + "ax3.plot(df.index, df['volatility_20'] * 100, label='20H Volatility', alpha=0.8)\n", + "ax3.plot(df.index, df['volatility_50'] * 100, label='50H Volatility', alpha=0.8)\n", + "ax3.set_title('Rolling Volatility (%)', fontsize=14, fontweight='bold')\n", + "ax3.set_ylabel('Volatility (%)')\n", + "ax3.legend()\n", + "ax3.grid(True, alpha=0.3)\n", + "\n", + "# Plot 4: Volume analysis\n", + "ax4.plot(df.index, df['volume'], alpha=0.5, linewidth=0.5, color='blue', label='Volume')\n", + "ax4.plot(df.index, df['volume_sma_20'], color='red', label='Volume SMA 20', linewidth=2)\n", + "ax4.set_title('Trading Volume', fontsize=14, fontweight='bold')\n", + "ax4.set_ylabel('Volume')\n", + "ax4.legend()\n", + "ax4.grid(True, alpha=0.3)\n", + "\n", + "plt.tight_layout()\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Additional analysis plots\n", + "fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(20, 12))\n", + "\n", + "# Plot 1: Price position in range\n", + "ax1.plot(df.index, df['price_position_20'] * 100, label='20H Price Position', alpha=0.8)\n", + "ax1.plot(df.index, df['price_position_50'] * 100, label='50H Price Position', alpha=0.8)\n", + "ax1.axhline(y=80, color='red', linestyle='--', alpha=0.7, label='Overbought (80%)')\n", + "ax1.axhline(y=20, color='green', linestyle='--', alpha=0.7, label='Oversold (20%)')\n", + "ax1.set_title('Price Position in Range (%)', fontsize=14, fontweight='bold')\n", + "ax1.set_ylabel('Position (%)')\n", + "ax1.set_ylim(0, 100)\n", + "ax1.legend()\n", + "ax1.grid(True, alpha=0.3)\n", + "\n", + "# Plot 2: SMA vs EMA comparison\n", + "ax2.plot(df.index, df['sma_20'], label='SMA 20', alpha=0.8)\n", + "ax2.plot(df.index, df['ema_20'], label='EMA 20', alpha=0.8)\n", + "ax2.plot(df.index, df['close'], label='Close Price', alpha=0.5, linewidth=0.5)\n", + "ax2.set_title('SMA vs EMA Comparison', fontsize=14, fontweight='bold')\n", + "ax2.set_ylabel('Price (USD)')\n", + "ax2.legend()\n", + "ax2.grid(True, alpha=0.3)\n", + "\n", + "# Plot 3: Cumulative returns\n", + "ax3.plot(df.index, df['cumulative_returns'] * 100, linewidth=2, label='Cumulative Returns')\n", + "ax3.axhline(y=0, color='black', linestyle='-', alpha=0.3)\n", + "ax3.fill_between(df.index, 0, df['cumulative_returns'] * 100, alpha=0.3)\n", + "ax3.set_title('Cumulative Returns (%)', fontsize=14, fontweight='bold')\n", + "ax3.set_ylabel('Cumulative Returns (%)')\n", + "ax3.legend()\n", + "ax3.grid(True, alpha=0.3)\n", + "\n", + "# Plot 4: Volume ratio (volume vs average)\n", + "ax4.plot(df.index, df['volume_ratio'], alpha=0.7, linewidth=1)\n", + "ax4.axhline(y=1, color='black', linestyle='-', alpha=0.5, label='Average Volume')\n", + "ax4.axhline(y=2, color='red', linestyle='--', alpha=0.7, label='2x Average')\n", + "ax4.axhline(y=0.5, color='green', linestyle='--', alpha=0.7, label='0.5x Average')\n", + "ax4.set_title('Volume Ratio (vs 20H Average)', fontsize=14, fontweight='bold')\n", + "ax4.set_ylabel('Volume Ratio')\n", + "ax4.legend()\n", + "ax4.grid(True, alpha=0.3)\n", + "\n", + "plt.tight_layout()\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## šŸŽÆ Risk Analysis and Value at Risk" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "šŸŽÆ RISK ANALYSIS\n", + "==================================================\n", + "šŸ“Š VALUE AT RISK (VaR) - Hourly Returns (%)\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
historicalparametric
95% Confidence-1.0340-1.2772
99% Confidence-2.3843-1.8106
\n", + "
" + ], + "text/plain": [ + " historical parametric\n", + "95% Confidence -1.0340 -1.2772\n", + "99% Confidence -2.3843 -1.8106" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "šŸ“Š RISK-ADJUSTED PERFORMANCE METRICS\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
MetricValue
0Sharpe Ratio1.2059
1Sortino Ratio1.1874
2Max Drawdown (%)-84.1799
3Annual Volatility (%)73.2498
4Downside Deviation (%)74.3929
\n", + "
" + ], + "text/plain": [ + " Metric Value\n", + "0 Sharpe Ratio 1.2059\n", + "1 Sortino Ratio 1.1874\n", + "2 Max Drawdown (%) -84.1799\n", + "3 Annual Volatility (%) 73.2498\n", + "4 Downside Deviation (%) 74.3929" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "šŸ“Š CURRENT RISK STATE\n", + "Current 20H Volatility: 0.0016\n", + "Volatility Percentile: 6.3%\n", + "Risk Assessment: Low\n" + ] + } + ], + "source": [ + "# 8ļøāƒ£ Risk analysis\n", + "print(\"šŸŽÆ RISK ANALYSIS\")\n", + "print(\"=\" * 50)\n", + "\n", + "# Value at Risk (VaR) calculations\n", + "confidence_levels = [0.95, 0.99]\n", + "var_results = {}\n", + "\n", + "for conf in confidence_levels:\n", + " # Historical VaR\n", + " var_hist = np.percentile(returns_clean, (1 - conf) * 100)\n", + " \n", + " # Parametric VaR (assuming normal distribution)\n", + " mean_ret = returns_clean.mean()\n", + " std_ret = returns_clean.std()\n", + " var_param = stats.norm.ppf(1 - conf, mean_ret, std_ret)\n", + " \n", + " var_results[conf] = {\n", + " 'historical': var_hist,\n", + " 'parametric': var_param\n", + " }\n", + "\n", + "# Display VaR results\n", + "var_df = pd.DataFrame(var_results).T\n", + "var_df.index = [f\"{int(conf*100)}% Confidence\" for conf in confidence_levels]\n", + "var_df *= 100 # Convert to percentage\n", + "\n", + "print(\"šŸ“Š VALUE AT RISK (VaR) - Hourly Returns (%)\")\n", + "display(var_df.round(4))\n", + "\n", + "# Risk-adjusted performance metrics\n", + "downside_returns = returns_clean[returns_clean < 0]\n", + "downside_deviation = np.sqrt(np.mean(downside_returns**2)) if len(downside_returns) > 0 else 0\n", + "\n", + "# Calculate max drawdown\n", + "cumulative_returns = (1 + returns_clean).cumprod()\n", + "running_max = cumulative_returns.expanding().max()\n", + "drawdown = (cumulative_returns - running_max) / running_max\n", + "max_drawdown = drawdown.min()\n", + "\n", + "# Sortino ratio\n", + "sortino_ratio = (annual_return / (downside_deviation * np.sqrt(hours_per_year))) if downside_deviation != 0 else np.inf\n", + "\n", + "risk_metrics = pd.DataFrame({\n", + " 'Metric': ['Sharpe Ratio', 'Sortino Ratio', 'Max Drawdown (%)', \n", + " 'Annual Volatility (%)', 'Downside Deviation (%)'],\n", + " 'Value': [sharpe_ratio, sortino_ratio, max_drawdown*100, \n", + " annual_volatility*100, downside_deviation*np.sqrt(hours_per_year)*100]\n", + "})\n", + "\n", + "print(\"\\nšŸ“Š RISK-ADJUSTED PERFORMANCE METRICS\")\n", + "display(risk_metrics.round(4))\n", + "\n", + "# Current risk state\n", + "current_vol = df['volatility_20'].iloc[-1]\n", + "vol_percentile = (df['volatility_20'] < current_vol).mean() * 100\n", + "\n", + "print(f\"\\nšŸ“Š CURRENT RISK STATE\")\n", + "print(f\"Current 20H Volatility: {current_vol:.4f}\")\n", + "print(f\"Volatility Percentile: {vol_percentile:.1f}%\")\n", + "print(f\"Risk Assessment: {'High' if vol_percentile > 75 else 'Medium' if vol_percentile > 25 else 'Low'}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## šŸ“Š Seasonality Analysis" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "šŸ“Š SEASONALITY ANALYSIS\n", + "==================================================\n", + "ā° HOURLY PATTERNS\n", + "Best hour: 22:00 (0.000514)\n", + "Worst hour: 3:00 (-0.000184)\n", + "\n", + "šŸ“… DAY OF WEEK PATTERNS\n", + "Mon: 0.000185\n", + "Tue: 0.000064\n", + "Wed: 0.000159\n", + "Thu: 0.000014\n", + "Fri: 0.000127\n", + "Sat: 0.000138\n", + "Sun: 0.000020\n", + "\n", + "šŸ–ļø WEEKEND vs WEEKDAY\n", + "Weekday returns: 0.000110\n", + "Weekend returns: 0.000079\n", + "Weekend premium: -0.000031\n", + "\n", + "šŸ“Š STATISTICAL SIGNIFICANCE TESTS\n", + "========================================\n", + "Day of week effect (ANOVA): p-value = 0.484602\n", + "Significant: No\n", + "Weekend effect (T-test): p-value = 0.609417\n", + "Significant: No\n" + ] + } + ], + "source": [ + "# 9ļøāƒ£ Seasonality analysis\n", + "print(\"šŸ“Š SEASONALITY ANALYSIS\")\n", + "print(\"=\" * 50)\n", + "\n", + "# Extract time components\n", + "df['hour'] = df.index.hour\n", + "df['day_of_week'] = df.index.dayofweek # 0=Monday, 6=Sunday\n", + "df['month'] = df.index.month\n", + "df['is_weekend'] = df['day_of_week'].isin([5, 6]) # Saturday, Sunday\n", + "\n", + "# Hourly patterns\n", + "hourly_stats = df.groupby('hour').agg({\n", + " 'returns': ['mean', 'std'],\n", + " 'volume': 'mean',\n", + " 'volatility_20': 'mean'\n", + "}).round(6)\n", + "\n", + "print(\"ā° HOURLY PATTERNS\")\n", + "best_hour = hourly_stats['returns']['mean'].idxmax()\n", + "worst_hour = hourly_stats['returns']['mean'].idxmin()\n", + "print(f\"Best hour: {best_hour}:00 ({hourly_stats['returns']['mean'][best_hour]:.6f})\")\n", + "print(f\"Worst hour: {worst_hour}:00 ({hourly_stats['returns']['mean'][worst_hour]:.6f})\")\n", + "\n", + "# Day of week patterns\n", + "dow_names = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']\n", + "dow_stats = df.groupby('day_of_week').agg({\n", + " 'returns': ['mean', 'std'],\n", + " 'volume': 'mean'\n", + "}).round(6)\n", + "\n", + "print(\"\\nšŸ“… DAY OF WEEK PATTERNS\")\n", + "for i, day in enumerate(dow_names):\n", + " if i in dow_stats.index:\n", + " mean_ret = dow_stats.loc[i, ('returns', 'mean')]\n", + " print(f\"{day}: {mean_ret:.6f}\")\n", + "\n", + "# Weekend vs weekday analysis\n", + "weekend_stats = df.groupby('is_weekend').agg({\n", + " 'returns': ['mean', 'std'],\n", + " 'volume': 'mean'\n", + "}).round(6)\n", + "\n", + "print(\"\\nšŸ–ļø WEEKEND vs WEEKDAY\")\n", + "if False in weekend_stats.index and True in weekend_stats.index:\n", + " weekday_ret = weekend_stats.loc[False, ('returns', 'mean')]\n", + " weekend_ret = weekend_stats.loc[True, ('returns', 'mean')]\n", + " print(f\"Weekday returns: {weekday_ret:.6f}\")\n", + " print(f\"Weekend returns: {weekend_ret:.6f}\")\n", + " print(f\"Weekend premium: {(weekend_ret - weekday_ret):.6f}\")\n", + "\n", + "# Statistical tests\n", + "print(\"\\nšŸ“Š STATISTICAL SIGNIFICANCE TESTS\")\n", + "print(\"=\" * 40)\n", + "\n", + "# Test for day of week effect\n", + "dow_groups = [df[df['day_of_week'] == i]['returns'].dropna() for i in range(7) if i in df['day_of_week'].values]\n", + "if len(dow_groups) > 1:\n", + " f_stat_dow, p_val_dow = stats.f_oneway(*dow_groups)\n", + " print(f\"Day of week effect (ANOVA): p-value = {p_val_dow:.6f}\")\n", + " print(f\"Significant: {'Yes' if p_val_dow < 0.05 else 'No'}\")\n", + "\n", + "# Test for weekend effect\n", + "weekday_returns = df[~df['is_weekend']]['returns'].dropna()\n", + "weekend_returns = df[df['is_weekend']]['returns'].dropna()\n", + "\n", + "if len(weekday_returns) > 0 and len(weekend_returns) > 0:\n", + " t_stat, p_val = stats.ttest_ind(weekend_returns, weekday_returns)\n", + " print(f\"Weekend effect (T-test): p-value = {p_val:.6f}\")\n", + " print(f\"Significant: {'Yes' if p_val < 0.05 else 'No'}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Seasonality visualization\n", + "fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(18, 12))\n", + "\n", + "# Plot 1: Hourly patterns\n", + "hours = range(24)\n", + "hourly_returns = [hourly_stats.loc[h, ('returns', 'mean')] if h in hourly_stats.index else 0 for h in hours]\n", + "hourly_volume = [hourly_stats.loc[h, ('volume', 'mean')] if h in hourly_stats.index else 0 for h in hours]\n", + "\n", + "ax1_twin = ax1.twinx()\n", + "ax1.bar(hours, hourly_returns, alpha=0.7, color='blue', label='Avg Returns')\n", + "ax1_twin.plot(hours, hourly_volume, color='red', marker='o', label='Avg Volume')\n", + "ax1.set_title('Hourly Patterns', fontsize=14, fontweight='bold')\n", + "ax1.set_xlabel('Hour of Day')\n", + "ax1.set_ylabel('Average Returns', color='blue')\n", + "ax1_twin.set_ylabel('Average Volume', color='red')\n", + "ax1.grid(True, alpha=0.3)\n", + "\n", + "# Plot 2: Day of week patterns\n", + "dow_returns = [dow_stats.loc[i, ('returns', 'mean')] if i in dow_stats.index else 0 for i in range(7)]\n", + "ax2.bar(dow_names, dow_returns, alpha=0.7, color='green')\n", + "ax2.set_title('Day of Week Patterns', fontsize=14, fontweight='bold')\n", + "ax2.set_ylabel('Average Returns')\n", + "ax2.grid(True, alpha=0.3)\n", + "\n", + "# Plot 3: Volatility by hour\n", + "hourly_vol = [hourly_stats.loc[h, ('volatility_20', 'mean')] if h in hourly_stats.index else 0 for h in hours]\n", + "ax3.plot(hours, hourly_vol, marker='o', linewidth=2, color='purple')\n", + "ax3.set_title('Volatility by Hour', fontsize=14, fontweight='bold')\n", + "ax3.set_xlabel('Hour of Day')\n", + "ax3.set_ylabel('Average 20H Volatility')\n", + "ax3.grid(True, alpha=0.3)\n", + "\n", + "# Plot 4: Monthly patterns\n", + "monthly_stats = df.groupby('month')['returns'].mean()\n", + "month_names = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',\n", + " 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']\n", + "monthly_returns = [monthly_stats[i] if i in monthly_stats.index else 0 for i in range(1, 13)]\n", + "\n", + "ax4.bar(month_names, monthly_returns, alpha=0.7, color='orange')\n", + "ax4.set_title('Monthly Patterns', fontsize=14, fontweight='bold')\n", + "ax4.set_ylabel('Average Returns')\n", + "ax4.tick_params(axis='x', rotation=45)\n", + "ax4.grid(True, alpha=0.3)\n", + "\n", + "plt.tight_layout()\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## šŸŽ² Monte Carlo Simulation" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "šŸŽ² MONTE CARLO SIMULATION\n", + "==================================================\n", + "šŸ“Š SIMULATION PARAMETERS\n", + "Current Price: $84,351.47\n", + "Mean Log Return: 0.000070\n", + "Volatility: 0.007833\n", + "Simulations: 1,000\n", + "Forecast Period: 30 days\n", + "\n", + "šŸ“Š PRICE FORECASTS (30 days ahead)\n", + "========================================\n", + " 5th percentile: $60,878.82 (-27.83%)\n", + "25th percentile: $75,284.92 (-10.75%)\n", + "50th percentile: $86,797.64 ( +2.90%)\n", + "75th percentile: $100,770.79 (+19.47%)\n", + "95th percentile: $122,622.69 (+45.37%)\n", + "\n", + "šŸ“Š PROBABILITY ANALYSIS\n", + "Price increases: 55.2%\n", + "Price doubles: 0.1%\n", + "Price halves: 0.1%\n", + "\n", + "Expected Return: 4.89%\n", + "Return Volatility: 21.24%\n" + ] + } + ], + "source": [ + "# šŸ”Ÿ Monte Carlo simulation\n", + "print(\"šŸŽ² MONTE CARLO SIMULATION\")\n", + "print(\"=\" * 50)\n", + "\n", + "# Parameters\n", + "n_simulations = 1000\n", + "n_days = 30\n", + "n_hours = n_days * 24\n", + "\n", + "# Use historical statistics\n", + "mu = df['log_returns'].mean()\n", + "sigma = df['log_returns'].std()\n", + "current_price = df['close'].iloc[-1]\n", + "\n", + "print(f\"šŸ“Š SIMULATION PARAMETERS\")\n", + "print(f\"Current Price: ${current_price:,.2f}\")\n", + "print(f\"Mean Log Return: {mu:.6f}\")\n", + "print(f\"Volatility: {sigma:.6f}\")\n", + "print(f\"Simulations: {n_simulations:,}\")\n", + "print(f\"Forecast Period: {n_days} days\")\n", + "\n", + "# Geometric Brownian Motion simulation\n", + "np.random.seed(42)\n", + "dt = 1 # 1 hour time step\n", + "drift = (mu - 0.5 * sigma**2) * dt\n", + "vol_component = sigma * np.sqrt(dt)\n", + "\n", + "# Generate price paths\n", + "random_shocks = np.random.normal(0, 1, (n_simulations, n_hours))\n", + "price_paths = np.zeros((n_simulations, n_hours + 1))\n", + "price_paths[:, 0] = current_price\n", + "\n", + "for t in range(1, n_hours + 1):\n", + " price_paths[:, t] = price_paths[:, t-1] * np.exp(\n", + " drift + vol_component * random_shocks[:, t-1]\n", + " )\n", + "\n", + "# Calculate statistics\n", + "final_prices = price_paths[:, -1]\n", + "percentiles = [5, 25, 50, 75, 95]\n", + "price_percentiles = np.percentile(final_prices, percentiles)\n", + "\n", + "print(f\"\\nšŸ“Š PRICE FORECASTS ({n_days} days ahead)\")\n", + "print(\"=\" * 40)\n", + "for i, pct in enumerate(percentiles):\n", + " price = price_percentiles[i]\n", + " change = (price / current_price - 1) * 100\n", + " print(f\"{pct:2d}th percentile: ${price:8,.2f} ({change:+6.2f}%)\")\n", + "\n", + "# Probability analysis\n", + "prob_positive = (final_prices > current_price).mean() * 100\n", + "prob_double = (final_prices > current_price * 2).mean() * 100\n", + "prob_half = (final_prices < current_price * 0.5).mean() * 100\n", + "\n", + "print(f\"\\nšŸ“Š PROBABILITY ANALYSIS\")\n", + "print(f\"Price increases: {prob_positive:.1f}%\")\n", + "print(f\"Price doubles: {prob_double:.1f}%\")\n", + "print(f\"Price halves: {prob_half:.1f}%\")\n", + "\n", + "expected_return = (final_prices.mean() / current_price - 1) * 100\n", + "return_volatility = np.log(final_prices / current_price).std() * 100\n", + "\n", + "print(f\"\\nExpected Return: {expected_return:.2f}%\")\n", + "print(f\"Return Volatility: {return_volatility:.2f}%\")" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "āœ… Monte Carlo analysis completed!\n" + ] + } + ], + "source": [ + "# Monte Carlo visualization\n", + "fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(18, 12))\n", + "\n", + "# Create time axis\n", + "forecast_dates = pd.date_range(start=df.index[-1], periods=n_hours + 1, freq='H')\n", + "\n", + "# Plot 1: Sample price paths\n", + "n_paths_to_plot = 100\n", + "for i in range(n_paths_to_plot):\n", + " ax1.plot(forecast_dates, price_paths[i], alpha=0.1, color='blue', linewidth=0.5)\n", + "\n", + "# Plot percentile bands\n", + "path_percentiles = np.percentile(price_paths, [5, 25, 50, 75, 95], axis=0)\n", + "ax1.plot(forecast_dates, path_percentiles[2], color='red', linewidth=3, label='Median')\n", + "ax1.fill_between(forecast_dates, path_percentiles[0], path_percentiles[4], \n", + " alpha=0.2, color='red', label='5th-95th percentile')\n", + "ax1.axhline(y=current_price, color='black', linestyle='--', alpha=0.7, label='Current Price')\n", + "ax1.set_title(f'Monte Carlo Simulation ({n_simulations:,} paths)', fontsize=14, fontweight='bold')\n", + "ax1.set_ylabel('Price (USD)')\n", + "ax1.legend()\n", + "ax1.grid(True, alpha=0.3)\n", + "\n", + "# Plot 2: Distribution of final prices\n", + "ax2.hist(final_prices, bins=50, alpha=0.7, edgecolor='black', density=True)\n", + "ax2.axvline(current_price, color='black', linestyle='--', linewidth=2, label='Current Price')\n", + "ax2.axvline(final_prices.mean(), color='red', linestyle='-', linewidth=2, label='Mean Forecast')\n", + "ax2.set_title(f'Price Distribution in {n_days} Days', fontsize=14, fontweight='bold')\n", + "ax2.set_xlabel('Price (USD)')\n", + "ax2.set_ylabel('Density')\n", + "ax2.legend()\n", + "ax2.grid(True, alpha=0.3)\n", + "\n", + "# Plot 3: Returns distribution\n", + "returns_mc = (final_prices / current_price - 1) * 100\n", + "ax3.hist(returns_mc, bins=50, alpha=0.7, edgecolor='black')\n", + "ax3.axvline(0, color='black', linestyle='--', alpha=0.7, label='Break-even')\n", + "ax3.axvline(returns_mc.mean(), color='red', linestyle='-', linewidth=2, \n", + " label=f'Mean: {returns_mc.mean():.1f}%')\n", + "ax3.set_title(f'{n_days}-Day Return Distribution', fontsize=14, fontweight='bold')\n", + "ax3.set_xlabel('Return (%)')\n", + "ax3.set_ylabel('Frequency')\n", + "ax3.legend()\n", + "ax3.grid(True, alpha=0.3)\n", + "\n", + "# Plot 4: Risk cone\n", + "cone_percentiles = [10, 25, 50, 75, 90]\n", + "cone_data = np.percentile(price_paths, cone_percentiles, axis=0)\n", + "\n", + "for i, pct in enumerate(cone_percentiles):\n", + " alpha = 0.7 if pct == 50 else 0.5\n", + " linewidth = 2 if pct == 50 else 1\n", + " ax4.plot(forecast_dates, cone_data[i], alpha=alpha, linewidth=linewidth, \n", + " label=f'{pct}th percentile')\n", + "\n", + "ax4.axhline(y=current_price, color='black', linestyle='--', alpha=0.7, label='Current Price')\n", + "ax4.set_title('Price Forecast Cone', fontsize=14, fontweight='bold')\n", + "ax4.set_ylabel('Price (USD)')\n", + "ax4.legend()\n", + "ax4.grid(True, alpha=0.3)\n", + "\n", + "plt.tight_layout()\n", + "plt.show()\n", + "\n", + "print(\"āœ… Monte Carlo analysis completed!\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## šŸ“Š Data Export and Summary" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "šŸ“Š DATA EXPORT AND SUMMARY\n", + "==================================================\n", + "\n", + "šŸ“Š COMPREHENSIVE ANALYSIS SUMMARY\n", + "========================================\n", + "\n", + "DATASET INFO:\n", + "------------\n", + " Total Records : 83954\n", + " Date Range : 2015-09-20 14:00:00 to 2025-04-20 13:00:00\n", + " Duration (Days) : 3499\n", + " Features Created : 35\n", + "\n", + "PRICE STATISTICS:\n", + "----------------\n", + " Current Price : $84,351.47\n", + " Min Price : $225.57\n", + " Max Price : $108,276.43\n", + " Total Return : 36114.78%\n", + "\n", + "RISK METRICS:\n", + "------------\n", + " Annual Return : 88.33%\n", + " Annual Volatility : 73.25%\n", + " Sharpe Ratio : 1.206\n", + " Max Drawdown : -84.18%\n", + "\n", + "CURRENT SIGNALS:\n", + "---------------\n", + " Price vs SMA10 : -0.32%\n", + " Price vs SMA20 : -0.65%\n", + " Golden Cross : No\n", + " Current Volatility : 0.0016\n", + "\n", + "āœ… Enhanced dataset exported: Data/BTCUSD-enhanced-analysis.csv\n", + " šŸ“Š 83954 records, 35 columns\n", + "āœ… Daily summary exported: Data/BTCUSD-daily-summary.csv\n", + "āœ… Analysis summary exported: Data/BTCUSD-analysis-summary.txt\n", + "\n", + "šŸŽ‰ ENHANCED BITCOIN ANALYSIS COMPLETED! šŸŽ‰\n", + "šŸ“ All files exported to: /Users/alex/Dev/cryptoTraining/Session-01/Data\n", + "šŸ“Š Total features created: 35\n", + "šŸ“ˆ Analysis techniques: Technical indicators, Risk analysis, Seasonality, Monte Carlo\n" + ] + } + ], + "source": [ + "# 1ļøāƒ£1ļøāƒ£ Export data and create summary\n", + "print(\"šŸ“Š DATA EXPORT AND SUMMARY\")\n", + "print(\"=\" * 50)\n", + "\n", + "# Create comprehensive summary\n", + "summary_stats = {\n", + " 'Dataset Info': {\n", + " 'Total Records': len(df),\n", + " 'Date Range': f\"{df.index.min()} to {df.index.max()}\",\n", + " 'Duration (Days)': (df.index.max() - df.index.min()).days,\n", + " 'Features Created': len(df.columns)\n", + " },\n", + " \n", + " 'Price Statistics': {\n", + " 'Current Price': f\"${df['close'].iloc[-1]:,.2f}\",\n", + " 'Min Price': f\"${df['close'].min():,.2f}\",\n", + " 'Max Price': f\"${df['close'].max():,.2f}\",\n", + " 'Total Return': f\"{((df['close'].iloc[-1] / df['close'].iloc[0] - 1) * 100):.2f}%\"\n", + " },\n", + " \n", + " 'Risk Metrics': {\n", + " 'Annual Return': f\"{annual_return:.2%}\",\n", + " 'Annual Volatility': f\"{annual_volatility:.2%}\",\n", + " 'Sharpe Ratio': f\"{sharpe_ratio:.3f}\",\n", + " 'Max Drawdown': f\"{max_drawdown:.2%}\"\n", + " },\n", + " \n", + " 'Current Signals': {\n", + " 'Price vs SMA10': f\"{((df['close'].iloc[-1] / df['sma_10'].iloc[-1] - 1) * 100):+.2f}%\",\n", + " 'Price vs SMA20': f\"{((df['close'].iloc[-1] / df['sma_20'].iloc[-1] - 1) * 100):+.2f}%\",\n", + " 'Golden Cross': 'Yes' if df['golden_cross'].iloc[-1] else 'No',\n", + " 'Current Volatility': f\"{df['volatility_20'].iloc[-1]:.4f}\"\n", + " }\n", + "}\n", + "\n", + "# Print summary\n", + "print(\"\\nšŸ“Š COMPREHENSIVE ANALYSIS SUMMARY\")\n", + "print(\"=\" * 40)\n", + "\n", + "for category, metrics in summary_stats.items():\n", + " print(f\"\\n{category.upper()}:\")\n", + " print(\"-\" * len(category))\n", + " for metric, value in metrics.items():\n", + " print(f\" {metric:20}: {value}\")\n", + "\n", + "# Export enhanced dataset\n", + "export_dir = \"Data\"\n", + "if not os.path.exists(export_dir):\n", + " os.makedirs(export_dir)\n", + "\n", + "# Main dataset export\n", + "main_export_path = os.path.join(export_dir, \"BTCUSD-enhanced-analysis.csv\")\n", + "df.to_csv(main_export_path)\n", + "print(f\"\\nāœ… Enhanced dataset exported: {main_export_path}\")\n", + "print(f\" šŸ“Š {len(df)} records, {len(df.columns)} columns\")\n", + "\n", + "# Daily summary export\n", + "daily_agg = df.resample('1D').agg({\n", + " 'open': 'first',\n", + " 'high': 'max',\n", + " 'low': 'min',\n", + " 'close': 'last',\n", + " 'volume': 'sum',\n", + " 'returns': 'sum',\n", + " 'volatility_20': 'mean'\n", + "})\n", + "\n", + "daily_export_path = os.path.join(export_dir, \"BTCUSD-daily-summary.csv\")\n", + "daily_agg.to_csv(daily_export_path)\n", + "print(f\"āœ… Daily summary exported: {daily_export_path}\")\n", + "\n", + "# Export analysis summary\n", + "summary_export_path = os.path.join(export_dir, \"BTCUSD-analysis-summary.txt\")\n", + "with open(summary_export_path, 'w') as f:\n", + " f.write(\"BITCOIN (BTCUSD) ENHANCED ANALYSIS SUMMARY\\n\")\n", + " f.write(\"=\" * 45 + \"\\n\\n\")\n", + " \n", + " for category, metrics in summary_stats.items():\n", + " f.write(f\"{category.upper()}:\\n\")\n", + " f.write(\"-\" * len(category) + \"\\n\")\n", + " for metric, value in metrics.items():\n", + " f.write(f\" {metric:25}: {value}\\n\")\n", + " f.write(\"\\n\")\n", + " \n", + " f.write(f\"Analysis completed: {pd.Timestamp.now()}\\n\")\n", + " f.write(f\"Monte Carlo forecast: {n_days} days ahead\\n\")\n", + " f.write(f\"Expected return: {expected_return:.2f}%\\n\")\n", + " f.write(f\"Probability of gain: {prob_positive:.1f}%\\n\")\n", + "\n", + "print(f\"āœ… Analysis summary exported: {summary_export_path}\")\n", + "\n", + "print(f\"\\nšŸŽ‰ ENHANCED BITCOIN ANALYSIS COMPLETED! šŸŽ‰\")\n", + "print(f\"šŸ“ All files exported to: {os.path.abspath(export_dir)}\")\n", + "print(f\"šŸ“Š Total features created: {len(df.columns)}\")\n", + "print(f\"šŸ“ˆ Analysis techniques: Technical indicators, Risk analysis, Seasonality, Monte Carlo\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## šŸ“‹ Key Findings and Recommendations\n", + "\n", + "### šŸ” **Analysis Overview**\n", + "This enhanced analysis provides:\n", + "- **Technical Analysis**: Simple & exponential moving averages with trading signals\n", + "- **Risk Analysis**: VaR calculations, Sharpe ratio, maximum drawdown\n", + "- **Seasonality**: Hour, day-of-week, and monthly pattern analysis\n", + "- **Monte Carlo Simulation**: 30-day probabilistic price forecasting\n", + "- **Statistical Testing**: Significance tests for seasonal patterns\n", + "\n", + "### šŸ“ˆ **Key Insights**\n", + "1. **Moving Averages**: Effective for trend identification and signal generation\n", + "2. **Volatility Patterns**: Clear clustering with persistent high/low volatility periods\n", + "3. **Seasonality**: Significant time-based patterns in returns and volume\n", + "4. **Risk Profile**: High volatility asset requiring careful risk management\n", + "5. **Forecasting**: Monte Carlo provides probabilistic price ranges\n", + "\n", + "### āš ļø **Risk Considerations**\n", + "- **High Volatility**: Bitcoin exhibits extreme price swings\n", + "- **Model Limitations**: Historical patterns may not persist\n", + "- **Market Risk**: Cryptocurrency markets are highly speculative\n", + "- **Regulatory Risk**: Subject to changing regulations\n", + "\n", + "### šŸ› ļø **Practical Applications**\n", + "1. **Trend Following**: Use moving average crossovers for signals\n", + "2. **Risk Management**: Monitor volatility for position sizing\n", + "3. **Timing**: Consider seasonal patterns for entry/exit\n", + "4. **Forecasting**: Use Monte Carlo ranges for planning\n", + "\n", + "---\n", + "\n", + "**Disclaimer**: This analysis is for educational purposes only and does not constitute financial advice.\n", + "\n", + "*Enhanced analysis using Python, pandas, numpy, matplotlib, and statistical libraries.*" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.13.3" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +}