###################### Calculate daily counts from cumulative ###################### """ import pandas as pd # Load the cumulative data df = pd.read_csv('/home/athreya/Dengue/dengue_cleaned_data.csv', quotechar='"', skipinitialspace=True) # Ensure the 'Date' column is in datetime format df['metadata.recordDate'] = pd.to_datetime(df['metadata.recordDate']) # Convert cumulative count columns to numeric df['cumulative.positive.total'] = pd.to_numeric(df['cumulative.positive.total'], errors='coerce') df['cumulative.deaths'] = pd.to_numeric(df['cumulative.deaths'], errors='coerce') # Sort the DataFrame by 'Districts' and 'Date' df = df.sort_values(by=['location.admin2.name', 'metadata.recordDate']) # Initialize daily count columns df['daily_positive_total'] = 0 df['daily_deaths'] = 0 # Calculate daily counts with reset handling for district in df['location.admin2.name'].unique(): district_data = df[df['location.admin2.name'] == district] previous_positive = 0 previous_death = 0 for i in range(len(district_data)): current_positive = district_data['cumulative.positive.total'].iloc[i] current_death = district_data['cumulative.deaths'].iloc[i] # Check for resets in cumulative counts if current_positive < previous_positive: previous_positive = 0 # Reset if current_death < previous_death: previous_death = 0 # Reset # Calculate daily counts df.loc[district_data.index[i], 'daily_positive_total'] = max(current_positive - previous_positive, 0) df.loc[district_data.index[i], 'daily_deaths'] = max(current_death - previous_death, 0) # Update previous counts previous_positive = current_positive previous_death = current_death # Save the updated DataFrame to a new CSV file with all relevant data df.to_csv('/home/athreya/Dengue/Updated_Daily_Data.csv', index=False) print("Daily counts, cumulative counts, and deaths have been calculated and saved to 'Updated_Daily_Data.csv'") """ ####################### Visualizing the daily counts ditrict wise ###################### """ import pandas as pd import matplotlib.pyplot as plt import plotly.graph_objects as go import os import matplotlib.cm as cm import numpy as np # Load the updated data with daily counts df = pd.read_csv('/home/athreya/Dengue/Updated_Daily_File.csv') # Convert 'metadata.recordDate' to datetime format df['metadata.recordDate'] = pd.to_datetime(df['metadata.recordDate']) # Get unique districts districts = df['location.admin2.name'].unique() # Create directories to save plots if they do not exist png_output_dir = '/home/athreya/Dengue/plots' html_output_dir = '/home/athreya/Dengue/html_plots' os.makedirs(png_output_dir, exist_ok=True) os.makedirs(html_output_dir, exist_ok=True) # Set the Viridis color palette for Matplotlib viridis_colors = cm.viridis(np.linspace(0, 1, len(districts))) # Convert colors to hex for Plotly viridis_colors_hex = ['#{:02x}{:02x}{:02x}'.format(int(r * 255), int(g * 255), int(b * 255)) for r, g, b, _ in viridis_colors] # Plotting daily counts for each district for i, district in enumerate(districts): district_data = df[df['location.admin2.name'] == district].dropna() # Drop NaNs to ensure continuous lines # Resampling to select every 7th day district_data = district_data.set_index('metadata.recordDate').resample('7D').sum().reset_index() # Matplotlib Plot plt.figure(figsize=(10, 5)) plt.plot(district_data['metadata.recordDate'], district_data['daily_positive_total'], label='Daily Positive Count', color=viridis_colors_hex[i], marker='o', markersize=3) plt.plot(district_data['metadata.recordDate'], district_data['daily_deaths'], label='Daily Death Count', color=viridis_colors_hex[i], linestyle='--') plt.title(f'Daily Counts for {district}') plt.xlabel('Date') plt.ylabel('Count') plt.xticks(rotation=45) # Show only every 2nd date on x-axis plt.xticks(district_data['metadata.recordDate'][::8], rotation=45) plt.legend() plt.tight_layout() # Save each Matplotlib plot as a PNG file with numerical naming plt.savefig(os.path.join(png_output_dir, f'{i + 1}.png')) # Save as 1.png, 2.png, etc. plt.close() # Plotly Plot fig = go.Figure() fig.add_trace(go.Scatter( x=district_data['metadata.recordDate'], y=district_data['daily_positive_total'], mode='lines+markers', name='Daily Positive Count', line=dict(color=viridis_colors_hex[i]), marker=dict(size=8) )) fig.add_trace(go.Scatter( x=district_data['metadata.recordDate'], y=district_data['daily_deaths'], mode='lines+markers', name='Daily Death Count', line=dict(color=viridis_colors_hex[i], dash='dash'), marker=dict(size=8) )) fig.update_layout( title=f'Daily Counts for {district}', xaxis_title='Date', yaxis_title='Count', template='plotly_dark', xaxis=dict(tickformat='%Y-%m-%d'), yaxis=dict(title='Count'), xaxis_tickvals=district_data['metadata.recordDate'][::8] # Show only every 2nd date in Plotly ) # Save each Plotly plot as an HTML file with numerical naming fig.write_html(os.path.join(html_output_dir, f'{i + 1}.html')) # Save as 1.html, 2.html, etc. print(f'Plots for daily counts have been saved in {png_output_dir} (PNG) and {html_output_dir} (HTML)') """ #################### Plotting all the districts on a single plot ##################### """ import pandas as pd import matplotlib.pyplot as plt import plotly.graph_objects as go import os import matplotlib.cm as cm import numpy as np # Load the updated data with daily counts df = pd.read_csv('/home/athreya/Dengue/Updated_Daily_File.csv') # Get unique districts districts = df['location.admin2.name'].unique() # Create directories to save plots if they do not exist png_output_dir = '/home/athreya/Dengue/plots' html_output_dir = '/home/athreya/Dengue/html_plots' os.makedirs(png_output_dir, exist_ok=True) os.makedirs(html_output_dir, exist_ok=True) # Set the Viridis color palette for Matplotlib viridis_colors = cm.viridis(np.linspace(0, 1, len(districts))) # Convert colors to hex for Plotly viridis_colors_hex = ['#{:02x}{:02x}{:02x}'.format(int(r * 255), int(g * 255), int(b * 255)) for r, g, b, _ in viridis_colors] # Plot for Cumulative Counts plt.figure(figsize=(15, 8)) for i, district in enumerate(districts): district_data = df[df['location.admin2.name'] == district].dropna() plt.plot(district_data['metadata.recordDate'], district_data['cumulative.positive.total'], label=f'{district} - Cumulative Positive', color=viridis_colors_hex[i], marker='o', linestyle='-') plt.title('Cumulative Positive Counts for All Districts') plt.xlabel('Date') plt.ylabel('Count') plt.xticks(rotation=45) plt.legend() plt.tight_layout() plt.savefig(os.path.join(png_output_dir, 'Cumulative_Positive_Counts.png')) plt.close() # Plot for Daily Counts plt.figure(figsize=(15, 8)) for i, district in enumerate(districts): district_data = df[df['location.admin2.name'] == district].dropna() plt.plot(district_data['metadata.recordDate'], district_data['daily_positive_total'], label=f'{district} - Daily Positive Count', color=viridis_colors_hex[i], marker='o', linestyle='-') plt.plot(district_data['metadata.recordDate'], district_data['daily_deaths'], label=f'{district} - Daily Death Count', color=viridis_colors_hex[i], marker='o', linestyle='--') plt.title('Daily Counts for All Districts') plt.xlabel('Date') plt.ylabel('Count') plt.xticks(rotation=45) plt.legend() plt.tight_layout() plt.savefig(os.path.join(png_output_dir, 'Daily_Counts.png')) plt.close() # Plot for Daily Death Counts plt.figure(figsize=(15, 8)) for i, district in enumerate(districts): district_data = df[df['location.admin2.name'] == district].dropna() plt.plot(district_data['metadata.recordDate'], district_data['daily_deaths'], label=f'{district} - Daily Death Count', color=viridis_colors_hex[i], marker='o', linestyle='-') plt.title('Daily Death Counts for All Districts') plt.xlabel('Date') plt.ylabel('Count') plt.xticks(rotation=45) plt.legend() plt.tight_layout() plt.savefig(os.path.join(png_output_dir, 'Daily_Death_Counts.png')) plt.close() # Plotly for Cumulative Counts fig_cumulative = go.Figure() for i, district in enumerate(districts): district_data = df[df['location.admin2.name'] == district].dropna() fig_cumulative.add_trace(go.Scatter( x=district_data['metadata.recordDate'], y=district_data['cumulative.positive.total'], mode='lines+markers', name=f'{district} - Cumulative Positive Count', line=dict(color=viridis_colors_hex[i]), marker=dict(symbol='circle', size=8) )) fig_cumulative.update_layout( title='Cumulative Positive Counts for All Districts', xaxis_title='Date', yaxis_title='Count', template='plotly_dark', xaxis=dict(tickformat='%Y-%m-%d') ) fig_cumulative.write_html(os.path.join(html_output_dir, 'Cumulative_Positive_Counts.html')) # Plotly for Daily Counts fig_daily = go.Figure() for i, district in enumerate(districts): district_data = df[df['location.admin2.name'] == district].dropna() fig_daily.add_trace(go.Scatter( x=district_data['metadata.recordDate'], y=district_data['daily_positive_total'], mode='lines+markers', name=f'{district} - Daily Positive Count', line=dict(color=viridis_colors_hex[i]), marker=dict(symbol='circle', size=8) )) fig_daily.add_trace(go.Scatter( x=district_data['metadata.recordDate'], y=district_data['daily_deaths'], mode='lines+markers', name=f'{district} - Daily Death Count', line=dict(color=viridis_colors_hex[i], dash='dash'), marker=dict(symbol='circle', size=8) )) fig_daily.update_layout( title='Daily Counts for All Districts', xaxis_title='Date', yaxis_title='Count', template='plotly_dark', xaxis=dict(tickformat='%Y-%m-%d') ) fig_daily.write_html(os.path.join(html_output_dir, 'Daily_Counts.html')) # Plotly for Daily Death Counts fig_deaths = go.Figure() for i, district in enumerate(districts): district_data = df[df['location.admin2.name'] == district].dropna() fig_deaths.add_trace(go.Scatter( x=district_data['metadata.recordDate'], y=district_data['daily_deaths'], mode='lines+markers', name=f'{district} - Daily Death Count', line=dict(color=viridis_colors_hex[i]), marker=dict(symbol='circle', size=8) )) fig_deaths.update_layout( title='Daily Death Counts for All Districts', xaxis_title='Date', yaxis_title='Count', template='plotly_dark', xaxis=dict(tickformat='%Y-%m-%d') ) fig_deaths.write_html(os.path.join(html_output_dir, 'Daily_Death_Counts.html')) print(f'Separate plots for cumulative and daily counts have been saved in {png_output_dir} (PNG) and {html_output_dir} (HTML)') """ ###################### Plotting weekly, bi-weekly plots ###################### """ import pandas as pd import matplotlib.pyplot as plt import plotly.graph_objects as go import os import matplotlib.cm as cm import numpy as np # Load the updated data with daily counts df = pd.read_csv('/home/athreya/Dengue/Updated_Daily_File.csv') # Convert the date column to datetime format df['metadata.recordDate'] = pd.to_datetime(df['metadata.recordDate']) # Filter data to include only from 2021 onwards df = df[df['metadata.recordDate'] >= '2021-01-01'] # Create a directory for output plots output_dir = '/home/athreya/Dengue/aggregated_plots' os.makedirs(output_dir, exist_ok=True) # Set the Viridis color palette for Matplotlib viridis_colors = cm.viridis(np.linspace(0, 1, len(df['location.admin2.name'].unique()))) # Convert colors to hex for Plotly viridis_colors_hex = ['#{:02x}{:02x}{:02x}'.format(int(r * 255), int(g * 255), int(b * 255)) for r, g, b, _ in viridis_colors] # Function to plot weekly or bi-weekly trends for each district def plot_trends(aggregation): districts = df['location.admin2.name'].unique() for i, district in enumerate(districts): # Filter data for the district district_data = df[df['location.admin2.name'] == district].copy() district_data.set_index('metadata.recordDate', inplace=True) # Resample the data based on aggregation if aggregation == 'weekly': resampled_data = district_data.resample('W').sum() elif aggregation == 'bi-weekly': resampled_data = district_data.resample('2W').sum() else: raise ValueError("Aggregation must be 'weekly' or 'bi-weekly'") # Calculate averages for plotting resampled_data['avg_positive'] = resampled_data['daily_positive_total'].rolling(window=1).mean() resampled_data['avg_deaths'] = resampled_data['daily_deaths'].rolling(window=1).mean() # Drop NaNs resampled_data.dropna(inplace=True) # Matplotlib Plot plt.figure(figsize=(10, 5)) plt.gcf().patch.set_edgecolor('white') plt.tight_layout() plt.plot(resampled_data.index, resampled_data['avg_positive'], label='Weekly Average Positive Count', color=viridis_colors_hex[i], marker='o', linestyle='-') plt.plot(resampled_data.index, resampled_data['avg_deaths'], label='Weekly Average Death Count', color=viridis_colors_hex[i], linestyle='--') # No marker for death plt.title(f'Daily Counts (Averaged) for {district} ({aggregation.capitalize()})') plt.xlabel('Date') plt.ylabel('Count') plt.xticks(rotation=45) plt.legend() plt.savefig(os.path.join(output_dir, f'{i + 1}_{aggregation}.png')) plt.close() # Plotly Plot fig = go.Figure() fig.add_trace(go.Scatter( x=resampled_data.index, y=resampled_data['avg_positive'], mode='lines+markers', name='Weekly Average Positive Count', line=dict(color=viridis_colors_hex[i]), marker=dict(symbol='circle', size=8) )) fig.add_trace(go.Scatter( x=resampled_data.index, y=resampled_data['avg_deaths'], mode='lines', # No markers for death name='Weekly Average Death Count', line=dict(color=viridis_colors_hex[i], dash='dash') )) fig.update_layout( title=f'Daily Counts (Averaged) for {district} ({aggregation.capitalize()})', xaxis_title='Date', yaxis_title='Count', template='plotly_dark', xaxis=dict(tickformat='%Y-%m-%d') ) fig.write_html(os.path.join(output_dir, f'{i + 1}_{aggregation}.html')) # Generate plots for weekly and bi-weekly averages plot_trends('weekly') plot_trends('bi-weekly') print(f'Aggregated plots have been saved in {output_dir} (PNG and HTML)') """ ####################### Combined Yearly plots ##################### """ import pandas as pd import matplotlib.pyplot as plt import plotly.graph_objects as go import os import numpy as np import matplotlib.cm as cm import matplotlib.colors # Add this line # Load the updated data with daily counts df = pd.read_csv('/home/athreya/Dengue/Updated_Daily_File.csv') # Convert the date column to datetime format df['metadata.recordDate'] = pd.to_datetime(df['metadata.recordDate']) df = df[df['metadata.recordDate'] >= '2021-01-01'] # Create a directory for output plots output_dir = '/home/athreya/Dengue/combined_yearly_plots' os.makedirs(output_dir, exist_ok=True) # Function to plot combined yearly trends def plot_combined_yearly_trends(): # Set the index to the date column df.set_index('metadata.recordDate', inplace=True) # Resample the data yearly and sum across all districts yearly_data = df.resample('Y').sum() # Drop NaNs yearly_data.dropna(inplace=True) # Define colors using the Viridis palette viridis_colors = cm.viridis(np.linspace(0, 1, 2)) # Convert Viridis colors to hex viridis_colors_hex = [matplotlib.colors.to_hex(color) for color in viridis_colors] # Matplotlib Plot plt.figure(figsize=(10, 5)) plt.plot(yearly_data.index, yearly_data['daily_positive_total'], label='Yearly Total Positive Count', color=viridis_colors_hex[0], marker='o', linestyle='-') plt.plot(yearly_data.index, yearly_data['daily_deaths'], label='Yearly Total Death Count', color=viridis_colors_hex[1], linestyle='--') # No marker for death plt.title('Combined Yearly Counts for All Districts') plt.xlabel('Year') plt.ylabel('Count') plt.xticks(rotation=45) plt.legend() plt.tight_layout() plt.savefig(os.path.join(output_dir, 'combined_yearly_counts.png')) plt.close() # Plotly Plot fig = go.Figure() fig.add_trace(go.Scatter( x=yearly_data.index, y=yearly_data['daily_positive_total'], mode='lines+markers', name='Yearly Total Positive Count', line=dict(color=viridis_colors_hex[0]), marker=dict(symbol='circle', size=8) )) fig.add_trace(go.Scatter( x=yearly_data.index, y=yearly_data['daily_deaths'], mode='lines', # No markers for death name='Yearly Total Death Count', line=dict(color=viridis_colors_hex[1], dash='dash') )) fig.update_layout( title='Combined Yearly Counts for All Districts', xaxis_title='Year', yaxis_title='Count', template='plotly_dark', xaxis=dict(tickformat='%Y') ) fig.write_html(os.path.join(output_dir, 'combined_yearly_counts.html')) # Generate combined yearly average plots plot_combined_yearly_trends() print(f'Combined yearly aggregated plots have been saved in {output_dir} (PNG and HTML)') """ ############################### Year-wise plots ############################# """ import pandas as pd import matplotlib.pyplot as plt import plotly.graph_objects as go import os import numpy as np import matplotlib.cm as cm import matplotlib.colors as mcolors # Load the updated data with daily counts df = pd.read_csv('/home/athreya/Dengue/Updated_Daily_File.csv') # Convert the date column to datetime format df['metadata.recordDate'] = pd.to_datetime(df['metadata.recordDate']) df = df[df['metadata.recordDate'] >= '2021-01-01'] # Create a directory for output plots output_dir = '/home/athreya/Dengue/yearly_combined_plots' os.makedirs(output_dir, exist_ok=True) # Function to plot year-wise combined trends def plot_yearly_combined_trends(year): # Filter data for the specified year yearly_data = df[df['metadata.recordDate'].dt.year == year] # Set the index to the date column yearly_data.set_index('metadata.recordDate', inplace=True) # Group by district (location.admin2.name) and resample daily counts grouped_data = yearly_data.groupby('location.admin2.name').resample('D').sum() # Drop NaNs grouped_data.dropna(inplace=True) # Define colors using the Viridis palette viridis_colors = cm.viridis(np.linspace(0, 1, len(grouped_data.index.levels[0]))) # Convert Viridis colors to hex viridis_colors_hex = [mcolors.to_hex(color) for color in viridis_colors] # Create the Matplotlib Plot plt.figure(figsize=(15, 8)) for idx, district in enumerate(grouped_data.index.levels[0]): plt.plot(grouped_data.loc[district].index, grouped_data.loc[district]['daily_positive_total'], label=district, color=viridis_colors_hex[idx], marker='o', linestyle='-') plt.title(f'Daily Counts for All Districts in {year}') plt.xlabel('Date') plt.ylabel('Count') plt.xticks(rotation=45) plt.legend() plt.tight_layout() plt.savefig(os.path.join(output_dir, f'yearly_combined_counts_{year}.png')) plt.close() # Create the Plotly Plot fig = go.Figure() for idx, district in enumerate(grouped_data.index.levels[0]): fig.add_trace(go.Scatter( x=grouped_data.loc[district].index, y=grouped_data.loc[district]['daily_positive_total'], mode='lines+markers', name=district, line=dict(color=viridis_colors_hex[idx]), marker=dict(symbol='circle', size=8) )) fig.update_layout( title=f'Daily Counts for All Districts in {year}', xaxis_title='Date', yaxis_title='Count', template='plotly_dark', xaxis=dict(tickformat='%Y-%m-%d') ) fig.write_html(os.path.join(output_dir, f'yearly_combined_counts_{year}.html')) # Generate combined yearly plots for specific years for year in df['metadata.recordDate'].dt.year.unique(): plot_yearly_combined_trends(year) print(f'Yearly combined aggregated plots have been saved in {output_dir} (PNG and HTML)') """ ############################################## Monthly Plots ############################################# import pandas as pd import matplotlib.pyplot as plt import matplotlib.dates as mdates import plotly.graph_objects as go import os import numpy as np import matplotlib.cm as cm # Load the updated data with daily counts df = pd.read_csv('/home/athreya/Dengue/Updated_Daily_File.csv') # Convert the date column to datetime format df['metadata.recordDate'] = pd.to_datetime(df['metadata.recordDate']) df = df[df['metadata.recordDate'] >= '2021-01-01'] # Create a directory for output plots output_dir = '/home/athreya/Dengue/monthly_plots' os.makedirs(output_dir, exist_ok=True) # Set the Viridis color palette for Matplotlib viridis_colors = cm.viridis(np.linspace(0, 1, len(df['location.admin2.name'].unique()))) # Convert colors to hex for Plotly viridis_colors_hex = ['#{:02x}{:02x}{:02x}'.format(int(r * 255), int(g * 255), int(b * 255)) for r, g, b, _ in viridis_colors] # Function to plot monthly trends for each district def plot_monthly_trends(): districts = df['location.admin2.name'].unique() for i, district in enumerate(districts): # Filter data for the district district_data = df[df['location.admin2.name'] == district].copy() district_data.set_index('metadata.recordDate', inplace=True) # Resample the data on a monthly basis resampled_data = district_data.resample('M').sum() # Calculate averages for plotting resampled_data['avg_positive'] = resampled_data['daily_positive_total'].rolling(window=1).mean() resampled_data['avg_deaths'] = resampled_data['daily_deaths'].rolling(window=1).mean() # Drop NaNs resampled_data.dropna(inplace=True) # Matplotlib Plot plt.figure(figsize=(10, 5)) plt.gcf().patch.set_edgecolor('white') plt.tight_layout() # Plotting data plt.plot(resampled_data.index, resampled_data['avg_positive'], label='Monthly Average Positive Count', color=viridis_colors_hex[i], marker='o', linestyle='-') plt.plot(resampled_data.index, resampled_data['avg_deaths'], label='Monthly Average Death Count', color=viridis_colors_hex[i], linestyle='--') # No marker for death # Formatting the x-axis to show every third month plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m')) plt.gca().xaxis.set_major_locator(mdates.MonthLocator(interval=3)) # Every 3rd month plt.title(f'Monthly Counts (Averaged) for {district}') plt.xlabel('Date') plt.ylabel('Count') plt.xticks(rotation=45) plt.legend() plt.savefig(os.path.join(output_dir, f'{i + 1}_monthly.png')) plt.close() # Plotly Plot fig = go.Figure() fig.add_trace(go.Scatter( x=resampled_data.index, y=resampled_data['avg_positive'], mode='lines+markers', name='Monthly Average Positive Count', line=dict(color=viridis_colors_hex[i]), marker=dict(symbol='circle', size=8) )) fig.add_trace(go.Scatter( x=resampled_data.index, y=resampled_data['avg_deaths'], mode='lines', # No markers for death name='Monthly Average Death Count', line=dict(color=viridis_colors_hex[i], dash='dash') )) # Set Plotly x-axis to show every third month fig.update_layout( title=f'Monthly Counts (Averaged) for {district}', xaxis_title='Date', yaxis_title='Count', template='plotly_dark', xaxis=dict(tickformat='%Y-%m', tickvals=resampled_data.index[::3]) # Every 3rd month ) fig.write_html(os.path.join(output_dir, f'{i + 1}_monthly.html')) # Generate plots for monthly averages plot_monthly_trends() print(f'Aggregated monthly plots have been saved in {output_dir} (PNG and HTML)')