Below is the script to generate a CSV file from a JSON output. I wrote this script for generating CSV for collecting compute optimizer data so that each EC2 has one line of data in the CSV file. Later on this CSV file is uploaded to google sheet for further analysis.
Python script “reportComputeOptData.py” is called within shell script “reportComputeOptData.sh”.
Python Script
import sys
import json
import pandas as pd
## Env is set for proper console display
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)
## Env Setting - Ends
jsonfile = str(sys.argv[1])
csvfile = str(sys.argv[2])
with open(jsonfile) as file:
data = json.load(file)
df = pd.DataFrame(data['instanceRecommendations'])
for i,item in enumerate(df['utilizationMetrics']):
for k in range(len(df['utilizationMetrics'][i])):
#Add a new column with a default value and then add/update the value of that colm
df.at[i,'utilizationMetrics_name_{}'.format(k)] = dict(df['utilizationMetrics'][i][k])['name']
df.at[i,'utilizationMetrics_statistic_{}'.format(k)] = dict(df['utilizationMetrics'][i][k])['statistic']
df.at[i,'utilizationMetrics_value_{}'.format(k)] = dict(df['utilizationMetrics'][i][k])['value']
for m in range(len(df['recommendationOptions'][i])):
df.at[i,'recommendationOptions_instanceType_{}'.format(m)] = dict(df['recommendationOptions'][i][m])['instanceType']
df.at[i,'recommendationOptions_performanceRisk_{}'.format(m)] = dict(df['recommendationOptions'][i][m])['performanceRisk']
df.at[i,'recommendationOptions_rank_{}'.format(m)] = dict(df['recommendationOptions'][i][m])['rank']
for j in range(len(dict(df['recommendationOptions'][i][m])['projectedUtilizationMetrics'])):
df.at[i,'reco_projectedUtilizationMetrics_{}_name_{}'.format(m,j)] = dict(dict(df['recommendationOptions'][i][m])['projectedUtilizationMetrics'][j])['name']
df.at[i,'reco_projectedUtilizationMetrics_{}_statistic_{}'.format(m,j)] = dict(dict(df['recommendationOptions'][i][m])['projectedUtilizationMetrics'][j])['statistic']
df.at[i,'reco_projectedUtilizationMetrics_{}_value_{}'.format(m,j)] = dict(dict(df['recommendationOptions'][i][m])['projectedUtilizationMetrics'][j])['value']
df = df.drop({'utilizationMetrics','recommendationOptions'}, axis=1)
df.to_csv(csvfile, header=True,index=False)
print("CSV File generated at..- {}".format(csvfile))
Shell Script (which generates the json file which then parsed to python script to generate the CSV file)
#!/bin/sh
if [[ $# -lt 1 ]]; then
echo "Usage: ${0} <AccountID> [<Region>]"
exit 1
fi
NOW=$(date +"%m%d%Y%H%M")
AccountID=${1}
AWS_DEFAULT_REGION=${2} ## 3rd Argument is the Account Default Region is diff than the CLI server
script_top=/d01/app/aws_script/bin
outputdir=/d01/app/aws_script/output
csvfile=${outputdir}/${AccountID}_copt-${NOW}.csv
jsonfile=${outputdir}/${AccountID}_copt-${NOW}.json
#
## Reset Env variables
reset_env () {
unset AWS_SESSION_TOKEN
unset AWS_DEFAULT_REGION
unset AWS_SECRET_ACCESS_KEY
unset AWS_ACCESS_KEY_ID
} #end of reset_env
## Set Env function
assume_role () {
AccountID=${1}
source </path_to_source_env_file/filename> ${AccontID}
}
# Function assume_role ends
assume_role ${AccountID}
if [[ ! -z "$2" ]]; then
AWS_DEFAULT_REGION='us-east-2'
fi
#
## Generate json file
aws compute-optimizer get-ec2-instance-recommendations | jq -r . >${jsonfile}
## Pass the json file to python script along with the CSV File for the output
python ${script_top}/reportComputeOptData.py ${jsonfile} ${csvfile}
echo "CSV File generated... - ${csvfile}"
reset_env
Json file format
{
“instanceRecommendations”: [
{
“instanceArn”: “arn:aws:ec2:eu-east-1:123404238928:instance/i-04a67rqw6c029b82f”,
“accountId”: “123404238928”,
“instanceName”: “testserver01”,
“currentInstanceType”: “c4.xlarge”,
“finding”: “OVER_PROVISIONED”,
“utilizationMetrics”: [
{
“name”: “CPU”,
“statistic”: “MAXIMUM”,
“value”: 6.3559322033898304
}
],
“lookBackPeriodInDays”: 14,
“recommendationOptions”: [
{
“instanceType”: “t3.large”,
“projectedUtilizationMetrics”: [
{
“name”: “CPU”,
“statistic”: “MAXIMUM”,
“value”: 12.711864406779661
}
],
“performanceRisk”: 3,
“rank”: 1
},
{
“instanceType”: “m5.large”,
“projectedUtilizationMetrics”: [
{
“name”: “CPU”,
“statistic”: “MAXIMUM”,
“value”: 12.711864406779661
}
],
“performanceRisk”: 1,
“rank”: 2
},
{
“instanceType”: “m4.large”,
“projectedUtilizationMetrics”: [
{
“name”: “CPU”,
“statistic”: “MAXIMUM”,
“value”: 15.645371577574968
}
],
“performanceRisk”: 1,
“rank”: 3
}
],
“recommendationSources”: [
{
“recommendationSourceArn”: “arn:aws:ec2:eu-east-1:123404238928:instance/i-04a67rqw6c029b82f”,
“recommendationSourceType”: “Ec2Instance”
}
],
“lastRefreshTimestamp”: 1583986171.637
},
{
“instanceArn”: “arn:aws:ec2:eu-east-1:123404238928:instance/i-0af6a6b96e2690002”,
“accountId”: “123404238928”,
“instanceName”: “TestServer02”,
“currentInstanceType”: “t2.micro”,
“finding”: “OPTIMIZED”,
“utilizationMetrics”: [
{
“name”: “CPU”,
“statistic”: “MAXIMUM”,
“value”: 96.27118644067791
}
],
“lookBackPeriodInDays”: 14,
“recommendationOptions”: [
{
“instanceType”: “t3.micro”,
“projectedUtilizationMetrics”: [
{
“name”: “CPU”,
“statistic”: “MAXIMUM”,
“value”: 39.1101694915254
}
],
“performanceRisk”: 1,
“rank”: 1
},
{
“instanceType”: “t2.micro”,
“projectedUtilizationMetrics”: [
{
“name”: “CPU”,
“statistic”: “MAXIMUM”,
“value”: 96.27118644067791
}
],
“performanceRisk”: 1,
“rank”: 2
}
],
“recommendationSources”: [
{
“recommendationSourceArn”: “arn:aws:ec2:eu-east-1:123404238928:instance/i-0af6a6b96e2690002”,
“recommendationSourceType”: “Ec2Instance”
}
],
“lastRefreshTimestamp”: 1583986172.297
}
],
“errors”: []
}
Enjoy reading !!!
Anand M
Leave a comment