Archive

Archive for the ‘AWS/Boto3/Python’ Category

Script to generate CSV for Compute Optimizer data from a Json file

Below is the script to generate a CSV file from a JSON output. I wrote this script for generating CSV for collecting compute optimizer data so that each EC2 has one line of data in the CSV file. Later on this CSV file is uploaded to google sheet for further analysis.

Python script “reportComputeOptData.py” is called within shell script “reportComputeOptData.sh”.

Python Script

import sys
import json
import pandas as pd
## Env is set for proper console display
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)
## Env Setting - Ends
jsonfile = str(sys.argv[1])
csvfile = str(sys.argv[2])
with open(jsonfile) as file:
    data = json.load(file)

df = pd.DataFrame(data['instanceRecommendations'])
for i,item in enumerate(df['utilizationMetrics']):
    for k in range(len(df['utilizationMetrics'][i])):
        #Add a new column with a default value and then add/update the value of that colm
        df.at[i,'utilizationMetrics_name_{}'.format(k)] = dict(df['utilizationMetrics'][i][k])['name']
        df.at[i,'utilizationMetrics_statistic_{}'.format(k)] = dict(df['utilizationMetrics'][i][k])['statistic']
        df.at[i,'utilizationMetrics_value_{}'.format(k)] = dict(df['utilizationMetrics'][i][k])['value']


    for m in range(len(df['recommendationOptions'][i])):
       df.at[i,'recommendationOptions_instanceType_{}'.format(m)] = dict(df['recommendationOptions'][i][m])['instanceType']
       df.at[i,'recommendationOptions_performanceRisk_{}'.format(m)] = dict(df['recommendationOptions'][i][m])['performanceRisk']
       df.at[i,'recommendationOptions_rank_{}'.format(m)] = dict(df['recommendationOptions'][i][m])['rank']
       for j in range(len(dict(df['recommendationOptions'][i][m])['projectedUtilizationMetrics'])):
           df.at[i,'reco_projectedUtilizationMetrics_{}_name_{}'.format(m,j)] = dict(dict(df['recommendationOptions'][i][m])['projectedUtilizationMetrics'][j])['name']
           df.at[i,'reco_projectedUtilizationMetrics_{}_statistic_{}'.format(m,j)] = dict(dict(df['recommendationOptions'][i][m])['projectedUtilizationMetrics'][j])['statistic']
           df.at[i,'reco_projectedUtilizationMetrics_{}_value_{}'.format(m,j)] = dict(dict(df['recommendationOptions'][i][m])['projectedUtilizationMetrics'][j])['value']

df = df.drop({'utilizationMetrics','recommendationOptions'}, axis=1)
df.to_csv(csvfile, header=True,index=False)
print("CSV File generated at..-  {}".format(csvfile))

Shell Script (which generates the json file which then parsed to python script to generate the CSV file)

#!/bin/sh
if [[ $# -lt 1 ]]; then
  echo "Usage: ${0} <AccountID> [<Region>]"
  exit 1
fi
NOW=$(date +"%m%d%Y%H%M")
AccountID=${1}
AWS_DEFAULT_REGION=${2} ## 3rd Argument is the Account Default Region is diff than the CLI server
script_top=/d01/app/aws_script/bin
outputdir=/d01/app/aws_script/output
csvfile=${outputdir}/${AccountID}_copt-${NOW}.csv
jsonfile=${outputdir}/${AccountID}_copt-${NOW}.json
#
## Reset Env variables
reset_env () {
        unset AWS_SESSION_TOKEN
        unset AWS_DEFAULT_REGION
        unset AWS_SECRET_ACCESS_KEY
        unset AWS_ACCESS_KEY_ID
} #end of reset_env
## Set Env function
assume_role () {
AccountID=${1}
source </path_to_source_env_file/filename> ${AccontID}
}
# Function assume_role ends
assume_role ${AccountID}
if [[ ! -z "$2" ]]; then
        AWS_DEFAULT_REGION='us-east-2'
fi
#
## Generate json file
aws compute-optimizer get-ec2-instance-recommendations | jq -r . >${jsonfile}
## Pass the json file to python script along with the CSV File for the output
python ${script_top}/reportComputeOptData.py ${jsonfile} ${csvfile}
echo "CSV File generated... - ${csvfile}"
reset_env

Json file format

{
“instanceRecommendations”: [
{
“instanceArn”: “arn:aws:ec2:eu-east-1:123404238928:instance/i-04a67rqw6c029b82f”,
“accountId”: “123404238928”,
“instanceName”: “testserver01”,
“currentInstanceType”: “c4.xlarge”,
“finding”: “OVER_PROVISIONED”,
“utilizationMetrics”: [
{
“name”: “CPU”,
“statistic”: “MAXIMUM”,
“value”: 6.3559322033898304
}
],
“lookBackPeriodInDays”: 14,
“recommendationOptions”: [
{
“instanceType”: “t3.large”,
“projectedUtilizationMetrics”: [
{
“name”: “CPU”,
“statistic”: “MAXIMUM”,
“value”: 12.711864406779661
}
],
“performanceRisk”: 3,
“rank”: 1
},
{
“instanceType”: “m5.large”,
“projectedUtilizationMetrics”: [
{
“name”: “CPU”,
“statistic”: “MAXIMUM”,
“value”: 12.711864406779661
}
],
“performanceRisk”: 1,
“rank”: 2
},
{
“instanceType”: “m4.large”,
“projectedUtilizationMetrics”: [
{
“name”: “CPU”,
“statistic”: “MAXIMUM”,
“value”: 15.645371577574968
}
],
“performanceRisk”: 1,
“rank”: 3
}
],
“recommendationSources”: [
{
“recommendationSourceArn”: “arn:aws:ec2:eu-east-1:123404238928:instance/i-04a67rqw6c029b82f”,
“recommendationSourceType”: “Ec2Instance”
}
],
“lastRefreshTimestamp”: 1583986171.637
},
{
“instanceArn”: “arn:aws:ec2:eu-east-1:123404238928:instance/i-0af6a6b96e2690002”,
“accountId”: “123404238928”,
“instanceName”: “TestServer02”,
“currentInstanceType”: “t2.micro”,
“finding”: “OPTIMIZED”,
“utilizationMetrics”: [
{
“name”: “CPU”,
“statistic”: “MAXIMUM”,
“value”: 96.27118644067791
}
],
“lookBackPeriodInDays”: 14,
“recommendationOptions”: [
{
“instanceType”: “t3.micro”,
“projectedUtilizationMetrics”: [
{
“name”: “CPU”,
“statistic”: “MAXIMUM”,
“value”: 39.1101694915254
}
],
“performanceRisk”: 1,
“rank”: 1
},
{
“instanceType”: “t2.micro”,
“projectedUtilizationMetrics”: [
{
“name”: “CPU”,
“statistic”: “MAXIMUM”,
“value”: 96.27118644067791
}
],
“performanceRisk”: 1,
“rank”: 2
}
],
“recommendationSources”: [
{
“recommendationSourceArn”: “arn:aws:ec2:eu-east-1:123404238928:instance/i-0af6a6b96e2690002”,
“recommendationSourceType”: “Ec2Instance”
}
],
“lastRefreshTimestamp”: 1583986172.297
}
],
“errors”: []
}

CSV file Output

Enjoy reading !!!
Anand M

Categories: AWS/Boto3/Python Tags:

Collect Cloudwatch metrics (including custom one) and upload to S3 bucket

Recently I wrote a script to pull the cloudwatch metrics (including the custom ones – Memory utilization) using CLI. Objective is to have have the data published to S3 and then using Athena/QuickSight, create a dashboard so as to have a consolidated view of all the servers across All the AWS accounts for CPU and Memory utilization.

This dashboard will help to take a right decision on resizing the instances thereby optimizing the overall cost.
Script is scheduled (using crontab) to run every one hour. There are 2 parts of the script
1. collect_cw_metrics.py – This is the main script
2. collect_cw_metrics.sh – This is a wrapper and internally calls python script.

How the script is called :

/path/collect_cw_metrics.sh <Destination_AWS_Account ID> <S3_Bucket_AWS_Account_ID> [<AWS_Region>]

Wrapper script – collect_cw_metrics.sh

#!/bin/sh
if [[ $# -lt 2 ]]; then
  echo "Usage: ${0} <AccountID> <S3_Bucket_AccountID>"
  exit 1
fi
NOW=$(date +"%m%d%Y%H%M")
AccontID=${1}
s3_AccountID=${2}
AWS_DEFAULT_REGION=${3} ## 3rd Argument is the Account Default Region is diff than the CLI server
csvfile=/tmp/cw-${AccontID}-${NOW}.csv
#
## Reset Env variables
reset_env () {
        unset AWS_SESSION_TOKEN
        unset AWS_DEFAULT_REGION
        unset AWS_SECRET_ACCESS_KEY
        unset AWS_ACCESS_KEY_ID
} #end of reset_env
## Set Env function
assume_role () {
AccontID=${1}
source </path_to_source_env_file/filename> ${AccontID}
}
# Function assume_role ends
assume_role ${AccontID}
if [[ ! -z "$3" ]]; then
        AWS_DEFAULT_REGION='us-east-2'
fi
#
## Generate CSV file
python <path_of_the_script>/collect_cw_metrics.py ${AccontID} ${csvfile}
##
## Upload generated CSV file to S3
reset_env
assume_role ${s3_AccountID}
echo ${csvfile}
echo "Uploading data file  to S3...."
aws s3 cp ${csvfile} <Bucket_Name>
reset_env

Main python Script – collect_cw_metrics.py

#!/usr/bin/python
# To Correct indent in the code - autopep8 cw1.py
import sys
import boto3
import logging
import pandas as pd
import datetime
from datetime import datetime
from datetime import timedelta

AccountID = str(sys.argv[1])
csvfile = str(sys.argv[2])
logger = logging.getLogger()
logger.setLevel(logging.INFO)
# define the connection
client = boto3.client('ec2')
ec2 = boto3.resource('ec2')
cw = boto3.client('cloudwatch')


# Function to get instance Name
def get_instance_name(fid):
    ec2instance = ec2.Instance(fid)
    instancename = ''
    for tags in ec2instance.tags:
        if tags["Key"] == 'Name':
            instancename = tags["Value"]
    return instancename


# Function to get instance ID (mandatory for Custom memory Datapoints)
def get_instance_imageID(fid):
    rsp = client.describe_instances(InstanceIds=[fid])
    for resv in rsp['Reservations']:
        v_ImageID = resv['Instances'][0]['ImageId']
    return v_ImageID


# Function to get instance type (mandatory for Custom memory Datapoints)
def get_instance_Instype(fid):
    rsp = client.describe_instances(InstanceIds=[fid])
    for resv in rsp['Reservations']:
        v_InstanceType = resv['Instances'][0]['InstanceType']
    return v_InstanceType


# all running EC2 instances.
filters = [{
    'Name': 'instance-state-name',
    'Values': ['running']
}
]

# filter the instances
instances = ec2.instances.filter(Filters=filters)

# locate all running instances
RunningInstances = [instance.id for instance in instances]
# print(RunningInstances)
dnow = datetime.now()
cwdatapointnewlist = []

for instance in instances:
    ec2_name = get_instance_name(instance.id)
    imageid = get_instance_imageID(instance.id)
    instancetype = get_instance_Instype(instance.id)
    cw_response = cw.get_metric_statistics(
        Namespace='AWS/EC2',
        MetricName='CPUUtilization',
        Dimensions=[
            {
                'Name': 'InstanceId',
                'Value': instance.id
            },
        ],
        StartTime=dnow+timedelta(hours=-1),
        EndTime=dnow,
        Period=300,
        Statistics=['Average', 'Minimum', 'Maximum']
    )

    cw_response_mem = cw.get_metric_statistics(
        Namespace='CWAgent',
        MetricName='mem_used_percent',
        Dimensions=[
            {
                'Name': 'InstanceId',
                'Value': instance.id
            },
            {
                'Name': 'ImageId',
                'Value': imageid
            },
            {
                'Name': 'InstanceType',
                'Value': instancetype
            },
        ],
        StartTime=dnow+timedelta(hours=-1),
        EndTime=dnow,
        Period=300,
        Statistics=['Average', 'Minimum', 'Maximum']
    )

    cwdatapoints = cw_response['Datapoints']
    label_CPU = cw_response['Label']
    for item in cwdatapoints:
        item.update({"Label": label_CPU})

    cwdatapoints_mem = cw_response_mem['Datapoints']
    label_mem = cw_response_mem['Label']
    for item in cwdatapoints_mem:
        item.update({"Label": label_mem})

# Add memory datapoints to CPUUtilization Datapoints
    cwdatapoints.extend(cwdatapoints_mem)

    for cwdatapoint in cwdatapoints:
         timestampStr = cwdatapoint['Timestamp'].strftime(
             "%d-%b-%Y %H:%M:%S.%f")
         cwdatapoint['Timestamp'] = timestampStr
         cwdatapoint.update({'Instance Name': ec2_name})
         cwdatapoint.update({'Instance ID': instance.id})
         cwdatapointnewlist.append(cwdatapoint)

df = pd.DataFrame(cwdatapointnewlist)
df.to_csv(csvfile, header=False, index=False)

Sample Flat file (CSV format) is as shown below.

Categories: AWS/Boto3/Python Tags: , ,