CSV Data Processing in Python

You need to read a csv file which consists of the date, the mean, max and min temperature, the amount of rainfall and the amount of snowfall. You need to write a function to be able to sort the data (you can’t use the built in sort, and you can’t use bubble sort). You need a method to find the rain fall for a particular date (you can’t use the built in search methods). You need methods to find the minimum and maximum for a date (again without using the built in methods). For more Python programming assignment help, contact us for details.


# This code is a starting point for your assignment 5.
# You have no need to change much of the main function or any of the supplied complete functions.
# Function headers for the ones you must write are supplied, as are their doc strings.

def readData(filename):
”’Reads the weather data from the supplied filename. The function returns a list of
dictionaries, where each dictionary consists of the data for a particular month.”’
# You can see the key names used by examining the code given below.
fileIn = open(filename, ‘r’)
allData = []
line = fileIn.readline()
while line != “”:
line = fileIn.readline().strip()
if line != “”:
values = line.split(‘,’)
monthData = {}
monthData[‘year’] = int(values[0])
monthData[‘month’] = int(values[1])
# yearmonth is a convenience key that is calculated
monthData[‘yearmonth’] = int(values[0]) * 100 + int(values[1])
monthData[‘meanT’] = float(values[2])
monthData[‘maxT’] = float(values[3])
monthData[‘minT’] = float(values[4])
monthData[‘rain’] = float(values[5])
monthData[‘snow’] = float(values[6])
return allData

def showSome(allData):
”’A convenience function that prints the beginning and end portions of the supplied list.”’
for i in range(13):
for i in range(-10, 0):

def getInt(prompt, lowLimit=None, highLimit=None):
”’A robust function that is sure to return an int value between the two
supplied limits.”’
numberOK = False
while not numberOK:
userNum = int(input(prompt))
if lowLimit != None and userNum < lowLimit:
print(“The number must be higher than”, lowLimit)
print(“Please try again.”)
elif highLimit != None and userNum > highLimit:
print(“The number must be lower than”, highLimit)
print(“Please try again.”)
numberOK = True
except ValueError:
print(“Your entry is not a valid integer, please try again.”)
return userNum

def insertionSort(allData, key):
”’Sorts the supplied list of dictionaries in situ into increasing order
by the key name supplied.”’
# You don’t have to use insertion sort, but you should *not* use bubble sort and
# cannot use any of the built-in Python sorts. Remove the “pass” line and add your
# own code:
for i in range(1, len(allData)):
x = allData[i]
# Move elements of arr[0..i-1], that are
# greater than key, to one position ahead
# of their current position
j = i-1
while j >=0 and x[key] < allData[j][key] :
allData[j+1] = allData[j]
j -= 1
allData[j+1] = x

def findRain(allData, target):
”’Uses a binary search to locate rainfall amounts in mm from the supplied list of
dictionaries. target is a date in the ‘yearmonth’ value format. The function assumes
that the list has been sorted by increasing date. The function will raise a ValueError
exception if the year and month in target do not exist in allData.”’
# You must use a binary seach and cannot use any built-in searches.
for i in range(len(allData)):
if allData[i][‘yearmonth’] == target:
return allData[i][‘rain’]

def findMax(allData, key):
”’Returns the record from allData that has the maximum value for the supplied key.”’
# You cannot use any built-in searches including the max() BIF.
_data = allData
return _data[-1]

def findMin(allData, key):
”’Returns the record from allData that has the minimum value for the supplied key.”’
# You cannot use any built-in searches including the min() BIF.
_data = allData
return _data[0]

def getAnnualSnow(allData):
”’This function returns a list of dictionaries which consist of the total
snowfall for every year listed in allData. Each record will consist of
{‘year’ : ####, ‘totalsnow’ : ###.#}, where # is a number. There will be one record per year.
It does not matter if any month records are missing, the total snowfall is still calculated, by
assuminng zero snow for the missing months.”’
data = []
flag = 0

for i in range(allData[0][‘year’],allData[-1][‘year’]+1):
element = {}
element[‘year’] = i
element[‘totalsnow’] = 0

for i in range(len(allData)):
for j in range(len(data)):

return data

def saveAnnualMeanTemp(allData, filename):
”’This function calculates the mean temperatures for an entire year and saves this
data to the supplied file – one line in the file per year.
It is assumed that each year from 1938 to 2012 has 12 months.”’
f = open(filename, “w”)
insertionSort(allData, ‘yearmonth’)
year = allData[0][‘year’]
temp = 0
flag = 0
for i in range(len(allData)):
if allData[i][‘year’] == year:
flag = flag + 1
temp = temp/flag
f.write(‘Year = {} and Temp = {}\n’.format(year,temp))
year = allData[i][‘year’]
temp = 0
flag = 0

temp = temp/flag
f.write(‘Year = {} and Temp = {}\n’.format(year,temp))

def main():
# Read the data
db = readData(“/Users/ksharshit/Desktop/TorontoWeatherData.csv”)
unsortedDb = readData(“TorontoWeatherData.csv”)

# Un-comment these lines to make sure your sort has worked properly.
# print(“Before sorting, as read from file:”)
# showSome(db)
insertionSort(db, ‘yearmonth’)
# print(“\nAfter sorting by date:”)
# showSome(db)

# Test your binary search by searching for the rainfall amount for a user-
# supplied year and month.
# searchYear = getInt(“Enter year for rainfall search: “, 1938, 2018)
# searchMonth = getInt(“Enter month for rainfall search: “, 1, 12)
# searchYearMonth = 100 * searchYear + searchMonth
# try:
# rainfall = findRain(db, searchYearMonth)
# print(f”Rainfall was {rainfall} mm.”)
# except ValueError as message:
# print(message)

# # Test your findMax and findMin functions by locating some extremes.
# # These two functions return a single record which is a dictionary.
# maxR = findMax(db, ‘maxT’)
# print(f”\nHighest temperature {maxR[‘maxT’]} deg C, in month {maxR[‘month’]}, {maxR[‘year’]}.”)
# minR = findMin(db, ‘minT’)
# print(f”Lowest temperature {minR[‘minT’]} deg C, in month {minR[‘month’]}, {minR[‘year’]}.”)
# maxR = findMax(db, ‘rain’)
# print(f”Highest rainfall {maxR[‘rain’]} mm, in month {maxR[‘month’]}, {maxR[‘year’]}.”)
# maxR = findMax(db, ‘snow’)
# print(f”Highest snowfall {maxR[‘snow’]} cm, in month {maxR[‘month’]}, {maxR[‘year’]}.”)

# annualSnow is a list of dictionaries, where each dictionary holds the year and the total snowfall
# for that year.
# annualSnow = getAnnualSnow(db)
# insertionSort(annualSnow, ‘totalsnow’)
# minR = annualSnow[0]
# print(f”\nLowest annual snowfall {minR[‘totalsnow’]} cm, in {minR[‘year’]}.”)
# medR = annualSnow[len(annualSnow) // 2]
# print(f”Median annual snowfall {medR[‘totalsnow’]} cm.”)
# maxR = annualSnow[len(annualSnow) – 1]
# print(f”Highest annual snowfall {maxR[‘totalsnow’]} cm, in {maxR[‘year’]}.”)

# Sort your data again, by mean temperature this time. This is the only way you can get the median
# value, which is defined as the middle of a sorted set of values.
insertionSort(db, ‘meanT’)
minR = db[0]
print(f”\nLowest mean temperature {minR[‘meanT’]} deg C, in month {minR[‘month’]}, {minR[‘year’]}.”)
medR = db[len(db) // 2]
print(f”Median mean temperature {medR[‘meanT’]} deg C.”)
maxR = db[-1]
print(f”Highest mean temperature {maxR[‘meanT’]} deg C, in month {maxR[‘month’]}, {maxR[‘year’]}.”)

# Look for Global Warming!
saveAnnualMeanTemp(db, “YearMeans.txt”)