Dictionaries Complete - upto finding the most common word in a data (line, text, file, etc.)
### Debugging Assignment ###
# code to be fixed:
han = open('emaildata')
for line in han:
line = line.rstrip() # removed white spaces
words = line.split() # spliting the line
if words[0] != 'From':
continue
print(words[2])
# --------
# now after fixing:
han = open('emaildata')
for line in han:
line = line.rstrip() # removed white spaces
print('LINE: ', line) # print each line to see the whole process going on before T.B.
words = line.split() # spliting the line
print('WORDS: ', words) # print the splitted line to check at which point program breaks
# Guardian Pattern
if len(words) < 1: # fixing the issue by setting the minimum leve for range to be appropriate.
continue
if words[0] != 'From':
print('ignore')
continue
print(words[2])
# Final shape of a clean code
han = open('emaildata')
for line in han:
line = line.rstrip()
words = line.split()
if len(words) < 1:
continue
if words[0] != 'From':
continue
print(words[2])
# ------------------ alternative approach to creating a guardian
# Alternatively to writing a code for Guradian, there can be to "protect
han = open('emaildata')
for line in han:
line = line.rstrip() # removed white spaces
print('LINE: ', line)
if line == '':
print('skipskipskipskipskipskipskipskipskipskipskipskipskipskipskipskipskipskipskipskip')
continue
words = line.split() # spliting the line
print('WORDS: ', words)
if words[0] != 'From':
print('ignore')
continue
print(words[2])
# Final code
han = open('emaildata')
for line in han:
line = line.rstrip()
if line == '': # Protecting approach: code for skipping the empty line
continue
words = line.split()
if words[0] != 'From':
continue
print(words[2])
# -------------- making guardian a bit stronger
# Guardian approach: making the guardian a bit stronger
# this depends on you judgement and your assumption about the readability of data.
han = open('emaildata')
for line in han:
line = line.rstrip()
words = line.split()
# guardian a bit stronger
if len(words) < 3: # changing minimum number from 1 to 3.
continue
if words[0] != 'From':
continue
print(words[2])
# -------------- making guardian in compound statement
# Guardian approach: creating code for guradian in a compound statement.
# Remamber, the order of code is really important from left to right.
# if first condition blows-up, the programme will give T.B. so be careful.
han = open('emaildata')
for line in han:
line = line.rstrip()
words = line.split()
# guardian in a compound statement: Order is very important (from left to right).
if len(words) < 3 or words[0] != 'From': # means either one of the condition is true, it will skip the line.
continue
print(words[2])
##### SUMMARY #####
# As its observed, its not always about a wrong line,
# debugging can be about adding additional code to support the code already written.
# there are multiple ways to fix the issue.
# its always flexible, and depends upon your own capability to write a cleaner, better code.
# Adding a print statement to check upon whats going on is really helpful sometimes.
# upto this point @ 13 July,2020
# POSTED # # POSTED # # POSTED # # POSTED # # POSTED # # POSTED # # POSTED # # POSTED # # POSTED # # POSTED # # POSTED # # POSTED # # POSTED # # POSTED # # POSTED # # POSTED #
# ==============================
# ==============================
# ============================== CHAPTER No. 9
# ============================== Python Directories.
# ============================== 4:29:00
# ==============================
# Definition
# - very powerful
# - they are like a little in memory databases
# Collection is defined as:
# a situation where we are going to have a variable like a list or dictionary.
# Non-collection is defined as:
# Most of our variables have one value in them - when we put a new value in them, the old value is overwritten.
# Lists vs Dictionary
# List:
# A linear collection of values that stay in order.
# an organized version of collection of values.
# kind of like Pringles.
# Dictionary:
# A "bag" of values, each with its own label.
# These are messier, where values are not in order.
# Everything has a "key".
# About Dictionaries:
# - Dictionaries are Python's most powerful data collection
# - Dictionaries allows us to do fast database-like operations
# - Dictionaries have different names in different languages:
# - Associative Arrays - Perl / PHP
# - Properties or Map or HashMap - Java
# - Property Bag - C# / .Net
# Associative means that there is association between key and a value.
# whereas in list, there is a position (index-position).
# - Position is less powerful, less flexible
# ----------------
# Lists "index" their entries based on the position in the list
# Dictionaries are like bags - no order
# So we "index" the things we put in "dictionaary" with a lookup-tag.
purse = dict()
purse['money'] = 12
purse['candy'] = 3
purse['tissues'] = 75
print(purse) # {'money': 12, 'candy': 3, 'tissues': 75}
print(purse['candy']) # 3
purse['candy'] = purse['candy'] + 2
print(purse['candy']) # 5
print(purse) # {'money': 12, 'candy': 5, 'tissues': 75}
print(len(purse)) # 3 -- means there are 3 items in this dictionary
# --------------------------
# Comparing Lists and Dictionaries
# Similarity: Dictionaries are like "lists" except that they use "keys" instead of numbers to lookup values.
# Assignment: How we store things in lists and dictionaries, and how we look things up in them?
# creating list, adding values to list and changing (mutation) values in lists through index number (position)
lst = list()
lst.append(3)
lst.append(127)
print(lst) # [3, 127]
lst[0] = 5 # [5, 127]
# creating dictionaries, adding values to dictionaries and changing values in dictionaries through keys (tags)
ddd = dict()
ddd['age'] = 36
ddd['course'] = 'Python'
print(ddd) # {'age': 36, 'course': 'Python'}
ddd['age'] = 25
print(ddd) # {'age': 25, 'course': 'Python'}
ddd[3] = 'numeric value'
print(ddd) # {'age': 25, 'course': 'Python', 3: 'numeric value'} --
# even numbers can be used as dictionary-index or key in dictionaries
# -------------------------
# Dictionary Literals (Constants) : Literal is a notation for representing a fixed value in source code.
# - Dictionary "literals" use curly braces "{}" and have a list of "key:value" pairs.
# a = {} and a = dict() are same things ........ i.e. {} and dict() are same things.
# both are used to create empty dictionaries.
# you can also create the dictionery and fill it with some data at same time in single line of code
# creating dictionary with data
jjj = {'chuck' : 1, 'fred' : 42, 'jan' : 100}
print(jjj) # {'chuck': 1, 'fred': 42, 'jan': 100}
aaa = {} # empty dictionary created
print(aaa) # {}
bbb = dict() # again emplty dictionary created
print(bbb) # {}
# ----------------------
#### COUNTING ####
# Most Common Name?
# One of the common application of dictionaries: Making Histograms i.e. counting the frequency of things.
ccc = dict()
ccc['csev'] = 1 # Values are assigned as 1,1 as we are here counting the frequency.
# as there weiil be a process of adding the values, we can do the process of summation of values of keys.
ccc['cwen'] = 1
print(ccc) # {'csev': 1, 'cwen': 1}
ccc['cwen'] = ccc['cwen'] + 1
print(ccc) # {'csev': 1, 'cwen': 2}
# Process explained:
# First you check if this neme is there in the dictionary (collection) already.
# if its already there, you really wanna add 1 to it.
# if you see this the first time, then you stick value 1 to it.
# so you use the name as "key".
# --------------------------
### Dictionary Traceback ###
# its an "error" to refer a key which is not there in the dictionary.
# we can use "in" operator to ckeck if a key is there in dictionary.
ccc = dict()
print('csew' in ccc) # False
print(ccc['csew']) # T.B. -- program blows up
# by using "in" operator we can avoid T.B.
# after checking if the "key" is not there in the dictionary, put it in.
# if its already there, then add 1 to the value of it.
# -------- Above situation leads to the following code. -----------------
# How to do when we see a new name.
# use of "if-else" statement to add keys into dictionary.
# This is the "histogram" code.
counts = dict()
names = ['csev', 'cwen', 'csev', 'zqain', 'cwen', 1, 2, 2, 2, 1, 1, 1, 1, 3]
for name in names:
if name not in counts:
counts[name] = 1
else:
counts[name] = counts[name] + 1
print(counts) # {'csev': 2, 'cwen': 2, 'zqain': 1, 1: 5, 2: 3, 3: 1}
# Notes:
# - List is the source of data, which is to be analysed.
# - empty dictionary is created and then keys and their respective counts are added into it through "if-else" statement.
# ========================
# ========================
### The "get" Method for dictionaries
# "get" collapses the four lines of code into one line.
# 4 lines means use of "if-else" statement to check and process the existance of a name in a dictionary.
if name in counts:
x = counts[name]
else:
x = 0
# "alternatively"
x = counts.get(name, 0) # {'csev': 2, 'cwen': 2, 'zqain': 1, 1: 5, 2: 3, 3: 1}
# ---------- Same exmple discussed above with "get" solution -----------
counts = dict()
names = ['csev', 'cwen', 'csev', 'zqain', 'cwen', 1, 2, 2, 2, 1, 1, 1, 1, 3]
for name in names:
counts[name] = counts.get(name,
0) + 1 # idiom -- will be used commonly in future, though for now its difficult to dijest.
print(counts)
# Notes:
# 0 is set to be deault value
# in Python, we often need to calculate the frequency of data.
# so this "get-method" is quite useful in Python programming.
# =====================================
# =====================================
# Counting Words #
# Seeing through files instead of just strings
# Counting Pattern
counts = dict()
print("Enter a line of Text:") # Just a title statement
line = input('')
words = line.split()
print('Words: ', words) # Title statement with the output of previous function i.e split()
print('Counting ...') # Title statement for next output; our desired outcome.
for word in words:
counts[word] = counts.get(word,0) + 1
print(counts)
# Further: though next topic
for key in counts: # Why this needed?
print(key, counts[key]) # gives output as "key" & respective "value"
# Issue: full stop, case of characters etc are letting the program consider the words seprately, seprately.
# -------------------------------------------
# -------------------------------------------
### Definite Loops and Dictionaries ###
# the ways you can loop through the dictionary: in above cases we saw how we can build dictionary through using loop.
# "for loops" can be written up for dictionaries.
# These will go through all the keys of dictionaries and looks up the values.
counts = {'chuck': 1, 'fred': 42, 'jan': 100}
for key in counts:
print(key, counts[key]) # key == key in dictionary, and counts[key] == pull the values based on "index-key"
# answer:
# chuck 1
# fred 42
# jan 100
# --------------------------------
# --------------------------------
### Retrieving lists of Keys and Values ###
# you can get a list of keys, values or items (both: key and value) from a dictionary
# following are different methods to get the desired outcome
# - getting keys only
# - getting values only
# even though you can't predict the order, if you ask for keys and values -
# in two different command, -
# the outcome will be in order.
# - getting both keys and values (i.e. items)
jjj = {'chuck': 1, 'fred': 42, 'jan': 100}
print(list(jjj)) # ['chuck', 'fred', 'jan']
print(jjj.keys()) # dict_keys(['chuck', 'fred', 'jan'])
print(jjj.values()) # dict_values([1, 42, 100])
# now next command will give output of a Data-Structure.
print(jjj.items()) # dict_items([('chuck', 1), ('fred', 42), ('jan', 100)])
# This a List: and its OUR FIRST REALLY combined DATA-STRUCTURE.
# tuple" coming soon e.g. -- ('chuck', 1)
# ----------------------
# ----------------------
#### BONUS: Two Iteration Variables! ####
# happens only in Python.
# We loop through the key-value pairs in a dictionary using *two* iteration variables.
# Each iteration, the first variable is the "key" and the second variable is the corresponding "value" for the key.
# here two iteration variables and ".items()" are basically related. else you can't get desired outcome.
jjj = {'chuck': 1, 'fred': 42, 'jan': 100}
for aaa, bbb in jjj.items():
print(aaa,bbb)
print('\n')
# getting only keys
for aaa, bbb in jjj.items():
print(aaa)
print('\n')
# getting only values
for aaa, bbb in jjj.items():
print(bbb)
# ----------------------------
# ----------------------------
# ----------------------------
##### the example of code shown at very early stage of lectures.
name = input('Enter File Name: ')
handle = open(name)
counts = dict()
for line in handle:
words = line.split()
for word in words:
counts[word] = counts.get(word, 0) + 1
print(counts)
# code for just getting organised output of words frequency
for key in counts:
print(key, '\t\t', counts[key])
# -----------------------------
# -----------------------------
name = input('Enter File Name: ')
handle = open(name)
counts = dict()
for line in handle:
words = line.split()
for word in words:
counts[word] = counts.get(word, 0) + 1 # this will build histogram
# now for getting the maximum counted item
big_count = None
big_word = None
for word, count in counts.items(): # using Two Iteration Variable Method
if big_count is None or count > big_count: # conditional statement is based on frequency (value)
big_word = word
big_count = count
print(big_word, big_count) # jan 352
# SubhanAllah #
# =================================
# -------------------------- CHAPTER No. 9
# -------------------------- Dictionaries - Counting Word Frequency using a Dictionary
# =================================
# Step 1:
# writing initial code while testing it at the same time, and finalizing it if its working properly.
# Assignment : word count
fname = input('Enter File: ')
if len(fname) < 1:
fname = 'clown.txt'
hand = open(fname)
for lin in hand:
lin = lin.rstrip() # removing white-spaces from right
print(lin)
wds = lin.split() # spliting the line into words i.e. from string to list.
print(wds)
# ----------- # step 2, moving further
# ----------- # step 2, moving further
# Assignment : word count
fname = input('Enter File: ')
if len(fname) < 1:
fname = 'clown.txt'
hand = open(fname)
for lin in hand:
lin = lin.rstrip() # removing white-spaces from right
# print(lin)
wds = lin.split() # spliting the line into words i.e. from string to list.
# print(wds)
for w in wds: # for printing each word in next line.
print(w) # it just hit all of the line, all of the words
# ----------- # step 3, moving further - where dictionary comes in
# ----------- # step 3, moving further - where dictionary comes in
# ----------- # step 3, moving further - where dictionary comes in
# Assignment : word count
fname = input('Enter File: ')
if len(fname) < 1:
fname = 'clown.txt'
hand = open(fname)
di = dict() # the key thing about dictionary is, we are gonna make a counter.
# and here words will be used as index.
for lin in hand:
lin = lin.rstrip()
# print(lin)
wds = lin.split()
# print(wds)
for w in wds:
print(w) # word at start
if w in di: # this will work if the word is already in the dictionary
di[w] = di[w] + 1
print('**Existing**') # this will work if the word is appearing first time in dictionary
else:
di[w] = 1
print('**New**')
print(di[w]) # count at the end
print('\n')
print(di)
# ----------- # step 4, moving further - get mechanism
# ----------- # step 4, moving further - get mechanism
# ----------- # step 4, moving further - get mechanism
# ----------- # step 4, moving further - get mechanism
# Assignment : word count
fname = input('Enter File: ')
if len(fname) < 1:
fname = 'clown.txt'
hand = open(fname)
di = dict()
for lin in hand:
lin = lin.rstrip()
# print(lin)
wds = lin.split()
# print(wds)
for w in wds:
print('**', w, di.get(w, -99)) # di.get(w, -99) is the important part here.
if w in di: # This
di[w] = di[w] + 1 # Code
else: # of four lines
di[w] = 1 # here
# Alternatively we have option to do the same function through one line of code only.
# print(w, di[w])
print(di)
# ----------- # step 5, moving further - get mechanism - finding the count
# ----------- # step 5, moving further - get mechanism - finding the count
# ----------- # step 5, moving further - get mechanism - finding the count
# ----------- # step 5, moving further - get mechanism - finding the count
# ----------- # step 5, moving further - get mechanism - finding the count
# Assignment : word count
fname = input('Enter File: ')
if len(fname) < 1:
fname = 'clown.txt'
hand = open(fname)
di = dict()
for lin in hand:
lin = lin.rstrip()
# print(lin)
wds = lin.split()
# print(wds)
for w in wds:
# if the key is not there the count is zero
oldcount = di.get(w, 0) # means lookup old count that you have. if you don't then put up 0.
print(w, 'old', oldcount)
newcount = oldcount + 1 # if you found old count, then add 1 to the old count. this will become the new count.
di[w] = newcount
print(w, 'new', newcount)
print(di)
# ----------- # step 6, moving further - get mechanism - in one line code
# ----------- # step 6, moving further - get mechanism - in one line code
# ----------- # step 6, moving further - get mechanism - in one line code
# ----------- # step 6, moving further - get mechanism - in one line code
# ----------- # step 6, moving further - get mechanism - in one line code
# ----------- # step 6, moving further - get mechanism - in one line code
# Assignment : word count
fname = input('Enter File: ')
if len(fname) < 1:
fname = 'clown.txt'
hand = open(fname)
di = dict()
for lin in hand:
lin = lin.rstrip()
# print(lin)
wds = lin.split()
# print(wds)
for w in wds:
# idiom: retrieve/create/update counter
di[w] = di.get(w, 0) + 1
print(w, 'new', di[w])
print(di)
# ----------- # step 7, moving further - finding most common word
# ----------- # step 7, moving further - finding most common word
# ----------- # step 7, moving further - finding most common word
# ----------- # step 7, moving further - finding most common word
# ----------- # step 7, moving further - finding most common word
# ----------- # step 7, moving further - finding most common word
# ----------- # step 7, moving further - finding most common word
# Assignment : word count
fname = input('Enter File: ')
if len(fname) < 1:
fname = 'clown.txt'
hand = open(fname)
di = dict()
for lin in hand:
lin = lin.rstrip()
wds = lin.split()
for w in wds:
# idiom: retrieve/create/update counter
di[w] = di.get(w, 0) + 1
# print(di)
# once we print out this dictionary here and we verified that this is right, don't worry too much the code above here
# now finding most common word
# this is like a maximum-loop
largest = -1 # though this is a bad assumption but as we know the count here will always be positive number so we can assume here -1
theword = None
for k, v in di.items(): # ".items()" is a method inside of all dictionaries, that says give me key-value pairs and we need two iteration variables.
print(k, v)
if v > largest:
largest = v
theword = k # capture/remember the "key" that was largest
print('Done', theword, largest)
# ----------- # step 8, moving further - making the code cleaner
# ----------- # step 8, moving further - making the code cleaner
# ----------- # step 8, moving further - making the code cleaner
# ----------- # step 8, moving further - making the code cleaner
# ----------- # step 8, moving further - making the code cleaner
# ----------- # step 8, moving further - making the code cleaner
# ----------- # step 8, moving further - making the code cleaner
# ----------- # step 8, moving further - making the code cleaner
# Assignment : word count
fname = input('Enter File: ')
if len(fname) < 1:
fname = 'clown.txt'
hand = open(fname)
di = dict()
for lin in hand:
lin = lin.rstrip()
wds = lin.split()
for w in wds:
# idiom: retrieve/create/update counter
di[w] = di.get(w, 0) + 1
# print(di)
# once we print out this dictionary here and we verified that this is right, don't worry too much the code above here
# now finding most common word
# this is like a maximum-loop
largest = -1
theword = None
for k, v in di.items():
if v > largest:
largest = v
theword = k
print(theword, largest)
# ===========================================
# ===========================================
# ===========================================
# ===========================================
######## CHAPTER 10
######## TUPLES
######## 5:23:06
# ============================================
# ============================================
# ============================================
# ============================================
### Debugging Assignment ###
# code to be fixed:
han = open('emaildata')
for line in han:
line = line.rstrip() # removed white spaces
words = line.split() # spliting the line
if words[0] != 'From':
continue
print(words[2])
# --------
# now after fixing:
han = open('emaildata')
for line in han:
line = line.rstrip() # removed white spaces
print('LINE: ', line) # print each line to see the whole process going on before T.B.
words = line.split() # spliting the line
print('WORDS: ', words) # print the splitted line to check at which point program breaks
# Guardian Pattern
if len(words) < 1: # fixing the issue by setting the minimum leve for range to be appropriate.
continue
if words[0] != 'From':
print('ignore')
continue
print(words[2])
# Final shape of a clean code
han = open('emaildata')
for line in han:
line = line.rstrip()
words = line.split()
if len(words) < 1:
continue
if words[0] != 'From':
continue
print(words[2])
# ------------------ alternative approach to creating a guardian
# Alternatively to writing a code for Guradian, there can be to "protect
han = open('emaildata')
for line in han:
line = line.rstrip() # removed white spaces
print('LINE: ', line)
if line == '':
print('skipskipskipskipskipskipskipskipskipskipskipskipskipskipskipskipskipskipskipskip')
continue
words = line.split() # spliting the line
print('WORDS: ', words)
if words[0] != 'From':
print('ignore')
continue
print(words[2])
# Final code
han = open('emaildata')
for line in han:
line = line.rstrip()
if line == '': # Protecting approach: code for skipping the empty line
continue
words = line.split()
if words[0] != 'From':
continue
print(words[2])
# -------------- making guardian a bit stronger
# Guardian approach: making the guardian a bit stronger
# this depends on you judgement and your assumption about the readability of data.
han = open('emaildata')
for line in han:
line = line.rstrip()
words = line.split()
# guardian a bit stronger
if len(words) < 3: # changing minimum number from 1 to 3.
continue
if words[0] != 'From':
continue
print(words[2])
# -------------- making guardian in compound statement
# Guardian approach: creating code for guradian in a compound statement.
# Remamber, the order of code is really important from left to right.
# if first condition blows-up, the programme will give T.B. so be careful.
han = open('emaildata')
for line in han:
line = line.rstrip()
words = line.split()
# guardian in a compound statement: Order is very important (from left to right).
if len(words) < 3 or words[0] != 'From': # means either one of the condition is true, it will skip the line.
continue
print(words[2])
##### SUMMARY #####
# As its observed, its not always about a wrong line,
# debugging can be about adding additional code to support the code already written.
# there are multiple ways to fix the issue.
# its always flexible, and depends upon your own capability to write a cleaner, better code.
# Adding a print statement to check upon whats going on is really helpful sometimes.
# upto this point @ 13 July,2020
# POSTED # # POSTED # # POSTED # # POSTED # # POSTED # # POSTED # # POSTED # # POSTED # # POSTED # # POSTED # # POSTED # # POSTED # # POSTED # # POSTED # # POSTED # # POSTED #
# ==============================
# ==============================
# ============================== CHAPTER No. 9
# ============================== Python Directories.
# ============================== 4:29:00
# ==============================
# Definition
# - very powerful
# - they are like a little in memory databases
# Collection is defined as:
# a situation where we are going to have a variable like a list or dictionary.
# Non-collection is defined as:
# Most of our variables have one value in them - when we put a new value in them, the old value is overwritten.
# Lists vs Dictionary
# List:
# A linear collection of values that stay in order.
# an organized version of collection of values.
# kind of like Pringles.
# Dictionary:
# A "bag" of values, each with its own label.
# These are messier, where values are not in order.
# Everything has a "key".
# About Dictionaries:
# - Dictionaries are Python's most powerful data collection
# - Dictionaries allows us to do fast database-like operations
# - Dictionaries have different names in different languages:
# - Associative Arrays - Perl / PHP
# - Properties or Map or HashMap - Java
# - Property Bag - C# / .Net
# Associative means that there is association between key and a value.
# whereas in list, there is a position (index-position).
# - Position is less powerful, less flexible
# ----------------
# Lists "index" their entries based on the position in the list
# Dictionaries are like bags - no order
# So we "index" the things we put in "dictionaary" with a lookup-tag.
purse = dict()
purse['money'] = 12
purse['candy'] = 3
purse['tissues'] = 75
print(purse) # {'money': 12, 'candy': 3, 'tissues': 75}
print(purse['candy']) # 3
purse['candy'] = purse['candy'] + 2
print(purse['candy']) # 5
print(purse) # {'money': 12, 'candy': 5, 'tissues': 75}
print(len(purse)) # 3 -- means there are 3 items in this dictionary
# --------------------------
# Comparing Lists and Dictionaries
# Similarity: Dictionaries are like "lists" except that they use "keys" instead of numbers to lookup values.
# Assignment: How we store things in lists and dictionaries, and how we look things up in them?
# creating list, adding values to list and changing (mutation) values in lists through index number (position)
lst = list()
lst.append(3)
lst.append(127)
print(lst) # [3, 127]
lst[0] = 5 # [5, 127]
# creating dictionaries, adding values to dictionaries and changing values in dictionaries through keys (tags)
ddd = dict()
ddd['age'] = 36
ddd['course'] = 'Python'
print(ddd) # {'age': 36, 'course': 'Python'}
ddd['age'] = 25
print(ddd) # {'age': 25, 'course': 'Python'}
ddd[3] = 'numeric value'
print(ddd) # {'age': 25, 'course': 'Python', 3: 'numeric value'} --
# even numbers can be used as dictionary-index or key in dictionaries
# -------------------------
# Dictionary Literals (Constants) : Literal is a notation for representing a fixed value in source code.
# - Dictionary "literals" use curly braces "{}" and have a list of "key:value" pairs.
# a = {} and a = dict() are same things ........ i.e. {} and dict() are same things.
# both are used to create empty dictionaries.
# you can also create the dictionery and fill it with some data at same time in single line of code
# creating dictionary with data
jjj = {'chuck' : 1, 'fred' : 42, 'jan' : 100}
print(jjj) # {'chuck': 1, 'fred': 42, 'jan': 100}
aaa = {} # empty dictionary created
print(aaa) # {}
bbb = dict() # again emplty dictionary created
print(bbb) # {}
# ----------------------
#### COUNTING ####
# Most Common Name?
# One of the common application of dictionaries: Making Histograms i.e. counting the frequency of things.
ccc = dict()
ccc['csev'] = 1 # Values are assigned as 1,1 as we are here counting the frequency.
# as there weiil be a process of adding the values, we can do the process of summation of values of keys.
ccc['cwen'] = 1
print(ccc) # {'csev': 1, 'cwen': 1}
ccc['cwen'] = ccc['cwen'] + 1
print(ccc) # {'csev': 1, 'cwen': 2}
# Process explained:
# First you check if this neme is there in the dictionary (collection) already.
# if its already there, you really wanna add 1 to it.
# if you see this the first time, then you stick value 1 to it.
# so you use the name as "key".
# --------------------------
### Dictionary Traceback ###
# its an "error" to refer a key which is not there in the dictionary.
# we can use "in" operator to ckeck if a key is there in dictionary.
ccc = dict()
print('csew' in ccc) # False
print(ccc['csew']) # T.B. -- program blows up
# by using "in" operator we can avoid T.B.
# after checking if the "key" is not there in the dictionary, put it in.
# if its already there, then add 1 to the value of it.
# -------- Above situation leads to the following code. -----------------
# How to do when we see a new name.
# use of "if-else" statement to add keys into dictionary.
# This is the "histogram" code.
counts = dict()
names = ['csev', 'cwen', 'csev', 'zqain', 'cwen', 1, 2, 2, 2, 1, 1, 1, 1, 3]
for name in names:
if name not in counts:
counts[name] = 1
else:
counts[name] = counts[name] + 1
print(counts) # {'csev': 2, 'cwen': 2, 'zqain': 1, 1: 5, 2: 3, 3: 1}
# Notes:
# - List is the source of data, which is to be analysed.
# - empty dictionary is created and then keys and their respective counts are added into it through "if-else" statement.
# ========================
# ========================
### The "get" Method for dictionaries
# "get" collapses the four lines of code into one line.
# 4 lines means use of "if-else" statement to check and process the existance of a name in a dictionary.
if name in counts:
x = counts[name]
else:
x = 0
# "alternatively"
x = counts.get(name, 0) # {'csev': 2, 'cwen': 2, 'zqain': 1, 1: 5, 2: 3, 3: 1}
# ---------- Same exmple discussed above with "get" solution -----------
counts = dict()
names = ['csev', 'cwen', 'csev', 'zqain', 'cwen', 1, 2, 2, 2, 1, 1, 1, 1, 3]
for name in names:
counts[name] = counts.get(name,
0) + 1 # idiom -- will be used commonly in future, though for now its difficult to dijest.
print(counts)
# Notes:
# 0 is set to be deault value
# in Python, we often need to calculate the frequency of data.
# so this "get-method" is quite useful in Python programming.
# =====================================
# =====================================
# Counting Words #
# Seeing through files instead of just strings
# Counting Pattern
counts = dict()
print("Enter a line of Text:") # Just a title statement
line = input('')
words = line.split()
print('Words: ', words) # Title statement with the output of previous function i.e split()
print('Counting ...') # Title statement for next output; our desired outcome.
for word in words:
counts[word] = counts.get(word,0) + 1
print(counts)
# Further: though next topic
for key in counts: # Why this needed?
print(key, counts[key]) # gives output as "key" & respective "value"
# Issue: full stop, case of characters etc are letting the program consider the words seprately, seprately.
# -------------------------------------------
# -------------------------------------------
### Definite Loops and Dictionaries ###
# the ways you can loop through the dictionary: in above cases we saw how we can build dictionary through using loop.
# "for loops" can be written up for dictionaries.
# These will go through all the keys of dictionaries and looks up the values.
counts = {'chuck': 1, 'fred': 42, 'jan': 100}
for key in counts:
print(key, counts[key]) # key == key in dictionary, and counts[key] == pull the values based on "index-key"
# answer:
# chuck 1
# fred 42
# jan 100
# --------------------------------
# --------------------------------
### Retrieving lists of Keys and Values ###
# you can get a list of keys, values or items (both: key and value) from a dictionary
# following are different methods to get the desired outcome
# - getting keys only
# - getting values only
# even though you can't predict the order, if you ask for keys and values -
# in two different command, -
# the outcome will be in order.
# - getting both keys and values (i.e. items)
jjj = {'chuck': 1, 'fred': 42, 'jan': 100}
print(list(jjj)) # ['chuck', 'fred', 'jan']
print(jjj.keys()) # dict_keys(['chuck', 'fred', 'jan'])
print(jjj.values()) # dict_values([1, 42, 100])
# now next command will give output of a Data-Structure.
print(jjj.items()) # dict_items([('chuck', 1), ('fred', 42), ('jan', 100)])
# This a List: and its OUR FIRST REALLY combined DATA-STRUCTURE.
# tuple" coming soon e.g. -- ('chuck', 1)
# ----------------------
# ----------------------
#### BONUS: Two Iteration Variables! ####
# happens only in Python.
# We loop through the key-value pairs in a dictionary using *two* iteration variables.
# Each iteration, the first variable is the "key" and the second variable is the corresponding "value" for the key.
# here two iteration variables and ".items()" are basically related. else you can't get desired outcome.
jjj = {'chuck': 1, 'fred': 42, 'jan': 100}
for aaa, bbb in jjj.items():
print(aaa,bbb)
print('\n')
# getting only keys
for aaa, bbb in jjj.items():
print(aaa)
print('\n')
# getting only values
for aaa, bbb in jjj.items():
print(bbb)
# ----------------------------
# ----------------------------
# ----------------------------
##### the example of code shown at very early stage of lectures.
name = input('Enter File Name: ')
handle = open(name)
counts = dict()
for line in handle:
words = line.split()
for word in words:
counts[word] = counts.get(word, 0) + 1
print(counts)
# code for just getting organised output of words frequency
for key in counts:
print(key, '\t\t', counts[key])
# -----------------------------
# -----------------------------
name = input('Enter File Name: ')
handle = open(name)
counts = dict()
for line in handle:
words = line.split()
for word in words:
counts[word] = counts.get(word, 0) + 1 # this will build histogram
# now for getting the maximum counted item
big_count = None
big_word = None
for word, count in counts.items(): # using Two Iteration Variable Method
if big_count is None or count > big_count: # conditional statement is based on frequency (value)
big_word = word
big_count = count
print(big_word, big_count) # jan 352
# SubhanAllah #
# =================================
# -------------------------- CHAPTER No. 9
# -------------------------- Dictionaries - Counting Word Frequency using a Dictionary
# =================================
# Step 1:
# writing initial code while testing it at the same time, and finalizing it if its working properly.
# Assignment : word count
fname = input('Enter File: ')
if len(fname) < 1:
fname = 'clown.txt'
hand = open(fname)
for lin in hand:
lin = lin.rstrip() # removing white-spaces from right
print(lin)
wds = lin.split() # spliting the line into words i.e. from string to list.
print(wds)
# ----------- # step 2, moving further
# ----------- # step 2, moving further
# Assignment : word count
fname = input('Enter File: ')
if len(fname) < 1:
fname = 'clown.txt'
hand = open(fname)
for lin in hand:
lin = lin.rstrip() # removing white-spaces from right
# print(lin)
wds = lin.split() # spliting the line into words i.e. from string to list.
# print(wds)
for w in wds: # for printing each word in next line.
print(w) # it just hit all of the line, all of the words
# ----------- # step 3, moving further - where dictionary comes in
# ----------- # step 3, moving further - where dictionary comes in
# ----------- # step 3, moving further - where dictionary comes in
# Assignment : word count
fname = input('Enter File: ')
if len(fname) < 1:
fname = 'clown.txt'
hand = open(fname)
di = dict() # the key thing about dictionary is, we are gonna make a counter.
# and here words will be used as index.
for lin in hand:
lin = lin.rstrip()
# print(lin)
wds = lin.split()
# print(wds)
for w in wds:
print(w) # word at start
if w in di: # this will work if the word is already in the dictionary
di[w] = di[w] + 1
print('**Existing**') # this will work if the word is appearing first time in dictionary
else:
di[w] = 1
print('**New**')
print(di[w]) # count at the end
print('\n')
print(di)
# ----------- # step 4, moving further - get mechanism
# ----------- # step 4, moving further - get mechanism
# ----------- # step 4, moving further - get mechanism
# ----------- # step 4, moving further - get mechanism
# Assignment : word count
fname = input('Enter File: ')
if len(fname) < 1:
fname = 'clown.txt'
hand = open(fname)
di = dict()
for lin in hand:
lin = lin.rstrip()
# print(lin)
wds = lin.split()
# print(wds)
for w in wds:
print('**', w, di.get(w, -99)) # di.get(w, -99) is the important part here.
if w in di: # This
di[w] = di[w] + 1 # Code
else: # of four lines
di[w] = 1 # here
# Alternatively we have option to do the same function through one line of code only.
# print(w, di[w])
print(di)
# ----------- # step 5, moving further - get mechanism - finding the count
# ----------- # step 5, moving further - get mechanism - finding the count
# ----------- # step 5, moving further - get mechanism - finding the count
# ----------- # step 5, moving further - get mechanism - finding the count
# ----------- # step 5, moving further - get mechanism - finding the count
# Assignment : word count
fname = input('Enter File: ')
if len(fname) < 1:
fname = 'clown.txt'
hand = open(fname)
di = dict()
for lin in hand:
lin = lin.rstrip()
# print(lin)
wds = lin.split()
# print(wds)
for w in wds:
# if the key is not there the count is zero
oldcount = di.get(w, 0) # means lookup old count that you have. if you don't then put up 0.
print(w, 'old', oldcount)
newcount = oldcount + 1 # if you found old count, then add 1 to the old count. this will become the new count.
di[w] = newcount
print(w, 'new', newcount)
print(di)
# ----------- # step 6, moving further - get mechanism - in one line code
# ----------- # step 6, moving further - get mechanism - in one line code
# ----------- # step 6, moving further - get mechanism - in one line code
# ----------- # step 6, moving further - get mechanism - in one line code
# ----------- # step 6, moving further - get mechanism - in one line code
# ----------- # step 6, moving further - get mechanism - in one line code
# Assignment : word count
fname = input('Enter File: ')
if len(fname) < 1:
fname = 'clown.txt'
hand = open(fname)
di = dict()
for lin in hand:
lin = lin.rstrip()
# print(lin)
wds = lin.split()
# print(wds)
for w in wds:
# idiom: retrieve/create/update counter
di[w] = di.get(w, 0) + 1
print(w, 'new', di[w])
print(di)
# ----------- # step 7, moving further - finding most common word
# ----------- # step 7, moving further - finding most common word
# ----------- # step 7, moving further - finding most common word
# ----------- # step 7, moving further - finding most common word
# ----------- # step 7, moving further - finding most common word
# ----------- # step 7, moving further - finding most common word
# ----------- # step 7, moving further - finding most common word
# Assignment : word count
fname = input('Enter File: ')
if len(fname) < 1:
fname = 'clown.txt'
hand = open(fname)
di = dict()
for lin in hand:
lin = lin.rstrip()
wds = lin.split()
for w in wds:
# idiom: retrieve/create/update counter
di[w] = di.get(w, 0) + 1
# print(di)
# once we print out this dictionary here and we verified that this is right, don't worry too much the code above here
# now finding most common word
# this is like a maximum-loop
largest = -1 # though this is a bad assumption but as we know the count here will always be positive number so we can assume here -1
theword = None
for k, v in di.items(): # ".items()" is a method inside of all dictionaries, that says give me key-value pairs and we need two iteration variables.
print(k, v)
if v > largest:
largest = v
theword = k # capture/remember the "key" that was largest
print('Done', theword, largest)
# ----------- # step 8, moving further - making the code cleaner
# ----------- # step 8, moving further - making the code cleaner
# ----------- # step 8, moving further - making the code cleaner
# ----------- # step 8, moving further - making the code cleaner
# ----------- # step 8, moving further - making the code cleaner
# ----------- # step 8, moving further - making the code cleaner
# ----------- # step 8, moving further - making the code cleaner
# ----------- # step 8, moving further - making the code cleaner
# Assignment : word count
fname = input('Enter File: ')
if len(fname) < 1:
fname = 'clown.txt'
hand = open(fname)
di = dict()
for lin in hand:
lin = lin.rstrip()
wds = lin.split()
for w in wds:
# idiom: retrieve/create/update counter
di[w] = di.get(w, 0) + 1
# print(di)
# once we print out this dictionary here and we verified that this is right, don't worry too much the code above here
# now finding most common word
# this is like a maximum-loop
largest = -1
theword = None
for k, v in di.items():
if v > largest:
largest = v
theword = k
print(theword, largest)
# ===========================================
# ===========================================
# ===========================================
# ===========================================
######## CHAPTER 10
######## TUPLES
######## 5:23:06
# ============================================
# ============================================
# ============================================
# ============================================
Comments
Post a Comment