+1 (315) 557-6473 

Python Program to Create Covid Case Surveillance Assignment Solution.


Instructions

Objective
Write a program to create a covid case surveillance in python.

Requirements and Specifications

program to create covid case surveillance in python

Source Code

import pandas as pd

import numpy as np

data = pd.read_csv('COVID-19_Case_Surveillance_Public_Use_Data_with_Geography.csv')

data.head()

# Part a)

cases_total = data.groupby(['case_month']).size().reset_index().rename(columns={0:'count'})

# Delete where case_month has the following values

invalid_vals = [' "error" : true', ' "message" : "Internal error"', ' "status" : 500', '}']

cases_total = cases_total[~cases_total['case_month'].isin(invalid_vals)]

# Set a new column named diff

cases_total['diff'] = 0

# For the first row set NaN

cases_total.loc[3,'diff'] = np.nan

cases_total.reset_index(drop=True, inplace=True)

# Calculate diff

cases_total['diff']=cases_total['count'].pct_change()*100

cases_total.head()

### Now, save dataframe to cases_total.csv

cases_total.to_csv('cases_total.csv', index = False)

# Part b)

cases_by_state = data.groupby(['case_month', 'res_state']).count().reset_index().rename(columns={0:'count'})

cases_by_state = cases_by_state.rename(columns = {'state_fips_code': 'count'})

cases_by_state = cases_by_state[['case_month', 'res_state', 'count']]

cases_by_state.tail()

cases_by_state = cases_by_state.rename(columns = {'state_fips_code': 'count'})

cases_by_state = cases_by_state[['case_month', 'res_state', 'count']]

cases_by_state.tail()

state_diff = cases_by_state.groupby(['case_month']).sum().reset_index()

state_diff.head()

state_diff['diff']=state_diff['count'].pct_change()*100

state_diff.head()

for i in range(len(state_diff)):

month = state_diff.loc[i, 'case_month']

diff = state_diff.loc[i, 'diff']

cases_by_state.loc[cases_by_state['case_month'] == month, 'diff'] = diff

cases_by_state.tail()

## Population by state

Dataset with population by state

pop_state = pd.read_csv('NST-EST2021-alldata.csv')

# Take only the columns we are interested

pop_state = pop_state[['NAME', 'POPESTIMATE2021']]

# Add a column named Abbrev

pop_state['Abbrev'] = ''

pop_state.head()

## Read states and their abreviations

states_abbrev = pd.read_csv('StatesAbbrev.csv')

states_abbrev.head()

## For each state in pop_state, get their abbreviation in states_abbrev and add to pop_state

for i in range(len(pop_state)):

state = pop_state.loc[i, 'NAME']

# get abbrev

abbrev = states_abbrev[states_abbrev['State'] == state]['Code']

if len(abbrev) > 0:

pop_state.loc[pop_state['NAME'] == state, 'Abbrev'] = abbrev.values[0]

# Drop rows where abbrev is empty

pop_state = pop_state.dropna()

pop_state.head()

### Now for each state-code in cases_by_state, compute the number of cases per 100000

for i in range(len(cases_by_state)):

state_code = cases_by_state.loc[i, 'res_state']

# Get the population for this state

pop = pop_state[pop_state['Abbrev'] == state_code]['POPESTIMATE2021']

if len(pop) > 0:

pop = pop.values[0]

# Update 'count

cases_by_state.loc[i, 'count'] = cases_by_state.loc[i, 'count']*100000/pop

cases_by_state.head()

# Sort by month and count

cases_by_state = cases_by_state.sort_values(by=['case_month', 'count'])

cases_by_state.head()

# Recalculate diff

states_diff = cases_by_state.groupby(['case_month']).sum().reset_index()

states_diff.head()

state_diff['diff']=state_diff['count'].pct_change()*100

state_diff.head()

for i in range(len(state_diff)):

month = state_diff.loc[i, 'case_month']

diff = state_diff.loc[i, 'diff']

cases_by_state.loc[cases_by_state['case_month'] == month, 'diff'] = diff

cases_by_state.tail()

### Write csv

with open('cases_by_state.csv', 'w') as f:

for month in cases_by_state['case_month'].unique():

rows = cases_by_state[cases_by_state['case_month'] == month]

rows = rows.to_numpy()

for row in rows:

row = list(map(str, row))

line = ','.join(row)

f.write(line + '\n')

f.write('\n')

for row in rows.to_numpy():

row = list(map(str, row))

print(','.join(row))

# Part c) CSV File with population