Order Now  +1 678 648 4277 

API Query and Population Homework Solution in Python


Question 1:

Create a Python application to query the API and populate the data.

Extract these variables for all datasets:

ds_short_name

Platform

Authors

start_date

Stop_date

Format

Coll_name

Proj_name

Dataset_name

Project_home_page [url]

Format

Granule_name

data_access

Checksum

File_size

Find the overall dataset count

Find the files count for each dataset

Find the total no. of files available

Show the datasets between a date range

Select a variable and show the associated dataset

For eg: Choose any format and show which dataset has those formats

Show the download links for a selected dataset

Users should be able to select no. of links to display

Show the statistics for all dataset

Export all the variables in a JSON file

Solution:


import json import requests from datetime import datetime def displayMenu(): print("1) Find the overall dataset count") print("2) Find the files count for each dataset") print("3) Find the total no. of files available") print("4) Show the datasets between a date range") print("5) Select a variable and show the associated dataset") print("6) Show the download links for a selected dataset") print("7) Show statistics for all datasets") print("8) Export to JSON") print("9) Exit") def getOption(message, lb, ub): while True: try: option = int(input(message)) if option in range(lb, ub+1): return option else: print(f"Enter a number between {lb} and {ub}") except ValueError: print("Enter a valid option.") link = "https://ghrc.nsstc.nasa.gov/hydro/es_proxy.php?esurl=_sql?sql=SELECT%20*%20from%20ghrc_inv" text = requests.get(link).json() # Variable to count the number of datasets datasets = list() # Store the datasets here datasets_clean = list() # this will store the same datasets but with dates as strings and not datetimes. variables = ['ds_short_name', 'platform', 'authors', 'start_date', 'stop_date', 'format', 'coll_name', 'proj_name', 'dataset_name', 'project_home_page', 'format', 'granule_name', 'data_access', 'checksum', 'file_size'] for data in text['hits']['hits']: dt = dict() dt_clean = dict() for var in variables: if var in data['_source']: if var not in ['start_date', 'stop_date']: dt[var] = data['_source'][var] dt_clean[var] = dt[var] else: dt[var] = datetime.strptime(data['_source'][var], "%Y-%m-%d %H:%M:%S") dt_clean[var] = data['_source'][var] datasets.append(dt) datasets_clean.append(dt_clean) # Begin with program while True: # Display menu displayMenu() option = getOption("Enter an option:", 1, 9) if option == 1: # Display count print(f"There are {len(datasets)} datasets") elif option == 2: print("The file count for each dataset is: ") for data in datasets: print("{}: {}".format(data['dataset_name'], data['file_size'])) elif option == 3: all_files = 0 for data in datasets: all_files += data['file_size'] print(f"There are {all_files} files between all datasets.") elif option == 4: start_date = input("Enter start date in the format year-month-day (i.e: 2015-10-21): ") end_date = input("Enter start date in the format year-month-day (i.e: 2015-10-21): ") start_date = datetime.strptime(start_date, '%Y-%m-%d') stop_date = datetime.strptime(end_date, '%Y-%m-%d') for data in datasets: if data['start_date'] >=start_date and data['stop_date'] <= stop_date: print("Dataset Name:", data['dataset_name']) elif option == 5: var = input("Enter variable: ") value = input("Enter value: ") for data in datasets: if var in data and data[var] == value: print("Dataset Name:", data['dataset_name']) elif option == 6: name = input("Enter the name of the dataset: ") for data in datasets: if data['dataset_name'] == name: print("The download links are: ") print(data['data_access']) elif option == 7: # Compute mean of file size file_size_mean = 0.0 for data in datasets: file_size_mean += data['file_size'] file_size_mean = file_size_mean / len(datasets) print(f"There are {len(datasets)} datasets") print("The average file size is: {:.2f}".format(file_size_mean)) elif option == 8: with open('output.json', 'w', encoding='utf-8') as f: for data in datasets_clean: json.dump(data, f, ensure_ascii=False, indent=4) print("Data saved to file output.json") elif option == 9: print("Good Bye!") break print()