Dictionaries Complete - upto finding the most common word in a data (line, text, file, etc.)

### Debugging Assignment ###

# code to be fixed:

han = open('emaildata')
for line in han:
    line = line.rstrip()    # removed white spaces
    words = line.split()    # spliting the line
    if words[0] != 'From':
        continue
    print(words[2])
# --------
    # now after fixing:

han = open('emaildata')
for line in han:
    line = line.rstrip()            # removed white spaces
    print('LINE: ', line)           # print each line to see the whole process going on before T.B.
    words = line.split()            # spliting the line
    print('WORDS: ', words)         # print the splitted line to check at which point program breaks
    # Guardian Pattern
    if len(words) < 1:              # fixing the issue by setting the minimum leve for range to be appropriate.
        continue
    if words[0] != 'From':
        print('ignore')
        continue
    print(words[2])

# Final shape of a clean code

han = open('emaildata')
for line in han:
    line = line.rstrip()
    words = line.split()
    if len(words) < 1:
        continue
    if words[0] != 'From':
        continue
    print(words[2])

# ------------------ alternative approach to creating a guardian

# Alternatively to writing a code for Guradian, there can be to "protect

han = open('emaildata')
for line in han:
    line = line.rstrip()    # removed white spaces
    print('LINE: ', line)
    if line == '':
        print('skipskipskipskipskipskipskipskipskipskipskipskipskipskipskipskipskipskipskipskip')
        continue
    words = line.split()    # spliting the line
    print('WORDS: ', words)
    if words[0] != 'From':
        print('ignore')
        continue
    print(words[2])

# Final code
han = open('emaildata')
for line in han:
    line = line.rstrip()
    if line == '':          # Protecting approach: code for skipping the empty line
        continue
    words = line.split()
    if words[0] != 'From':
        continue
    print(words[2])


# -------------- making guardian a bit stronger
# Guardian approach: making the guardian a bit stronger
# this depends on you judgement and your assumption about the readability of data.


han = open('emaildata')
for line in han:
    line = line.rstrip()
    words = line.split()
    # guardian a bit stronger
    if len(words) < 3:              # changing minimum number from 1 to 3.
        continue
    if words[0] != 'From':
        continue
    print(words[2])

# -------------- making guardian in compound statement

# Guardian approach: creating code for guradian in a compound statement.
# Remamber, the order of code is really important from left to right.
    # if first condition blows-up, the programme will give T.B. so be careful.
han = open('emaildata')
for line in han:
    line = line.rstrip()
    words = line.split()
    # guardian in a compound statement: Order is very important (from left to right).
    if len(words) < 3 or words[0] != 'From':        # means either one of the condition is true, it will skip the line.
        continue
    print(words[2])

##### SUMMARY #####

# As its observed, its not always about a wrong line,
    # debugging can be about adding additional code to support the code already written.
    # there are multiple ways to fix the issue.
        # its always flexible, and depends upon your own capability to write a cleaner, better code.
# Adding a print statement to check upon whats going on is really helpful sometimes.

 # upto this point @ 13 July,2020
 # POSTED # # POSTED # # POSTED # # POSTED # # POSTED # # POSTED # # POSTED # # POSTED # # POSTED # # POSTED # # POSTED # # POSTED # # POSTED # # POSTED # # POSTED # # POSTED #


# ==============================
# ==============================
# ==============================                CHAPTER No. 9
# ==============================                    Python Directories.
# ==============================                                            4:29:00
# ==============================

    # Definition
        # - very powerful
        # - they are like a little in memory databases
    # Collection is defined as:
        # a situation where we are going to have a variable like a list or dictionary.

    # Non-collection is defined as:
        # Most of our variables have one value in them - when we put a new value in them, the old value is overwritten.

# Lists vs Dictionary
    # List:
        # A linear collection of values that stay in order.
        # an organized version of collection of values.
        # kind of like Pringles.

    # Dictionary:
        # A "bag" of values, each with its own label.
        # These are messier, where values are not in order.
        # Everything has a "key".
# About Dictionaries:
    # - Dictionaries are Python's most powerful data collection
    # - Dictionaries allows us to do fast database-like operations
    # - Dictionaries have different names in different languages:
        # - Associative Arrays              -   Perl / PHP
        # - Properties or Map or HashMap    -   Java
        # - Property Bag                    -   C# / .Net

            # Associative means that there is association between key and a value.
            # whereas in list, there is a position (index-position).
                # - Position is less powerful, less flexible

# ----------------

# Lists "index" their entries based on the position in the list
# Dictionaries are like bags - no order
# So we "index" the things we put in "dictionaary" with a lookup-tag.

purse = dict()
purse['money'] = 12
purse['candy'] = 3
purse['tissues'] = 75
print(purse)                                # {'money': 12, 'candy': 3, 'tissues': 75}
print(purse['candy'])                       # 3
purse['candy'] = purse['candy'] + 2
print(purse['candy'])                       # 5
print(purse)                                # {'money': 12, 'candy': 5, 'tissues': 75}
print(len(purse))                           # 3  -- means there are 3 items in this dictionary

# --------------------------

# Comparing Lists and Dictionaries
# Similarity: Dictionaries are like "lists" except that they use "keys" instead of numbers to lookup values.

# Assignment: How we store things in lists and dictionaries, and how we look things up in them?

# creating list, adding values to list and changing (mutation) values in lists through index number (position)
lst = list()
lst.append(3)
lst.append(127)
print(lst)      # [3, 127]
lst[0] = 5      # [5, 127]

# creating dictionaries, adding values to dictionaries and changing values in dictionaries through keys (tags)
ddd = dict()
ddd['age'] = 36
ddd['course'] = 'Python'
print(ddd)                  # {'age': 36, 'course': 'Python'}
ddd['age'] = 25
print(ddd)                  # {'age': 25, 'course': 'Python'}
ddd[3] = 'numeric value'
print(ddd)                  # {'age': 25, 'course': 'Python', 3: 'numeric value'}  --
                                    # even numbers can be used as dictionary-index or key in dictionaries

# -------------------------

# Dictionary Literals (Constants) : Literal is a notation for representing a fixed value in source code.
    # - Dictionary "literals" use curly braces "{}" and have a list of "key:value" pairs.
    # a = {} and a = dict() are same things ........ i.e. {} and dict() are same things.
        # both are used to create empty dictionaries.
    # you can also create the dictionery and fill it with some data at same time in single line of code

# creating dictionary with data

jjj = {'chuck' : 1, 'fred' : 42, 'jan' : 100}
print(jjj)                                      # {'chuck': 1, 'fred': 42, 'jan': 100}

aaa = {}                                        # empty dictionary created
print(aaa)                                      # {}

bbb = dict()                                    # again emplty dictionary created
print(bbb)                                      # {}

# ----------------------

#### COUNTING ####

# Most Common Name?
# One of the common application of dictionaries: Making Histograms i.e. counting the frequency of things.
ccc = dict()
ccc['csev'] = 1                 # Values are assigned as 1,1 as we are here counting the frequency.
                                     # as there weiil be a process of adding the values, we can do the process of summation of values of keys.
ccc['cwen'] = 1
print(ccc)                      # {'csev': 1, 'cwen': 1}
ccc['cwen'] = ccc['cwen'] + 1
print(ccc)                      # {'csev': 1, 'cwen': 2}

# Process explained:
    # First you check if this neme is there in the dictionary (collection) already.
        # if its already there, you really wanna add 1 to it.
    # if you see this the first time, then you stick value 1 to it.
    # so you use the name as "key".

# --------------------------

### Dictionary Traceback ###

# its an "error" to refer a key which is not there in the dictionary.

# we can use "in" operator to ckeck if a key is there in dictionary.

ccc = dict()
print('csew' in ccc)  # False
print(ccc['csew'])  # T.B. --  program blows up

# by using "in" operator we can avoid T.B.
# after checking if the "key" is not there in the dictionary, put it in.
# if its already there, then add 1 to the value of it.

# -------- Above situation leads to the following code. -----------------

# How to do when we see a new name.
# use of "if-else" statement to add keys into dictionary.
# This is the "histogram" code.

counts = dict()
names = ['csev', 'cwen', 'csev', 'zqain', 'cwen', 1, 2, 2, 2, 1, 1, 1, 1, 3]
for name in names:
    if name not in counts:
        counts[name] = 1
    else:
        counts[name] = counts[name] + 1
print(counts)                                   # {'csev': 2, 'cwen': 2, 'zqain': 1, 1: 5, 2: 3, 3: 1}

# Notes:
    # - List is the source of data, which is to be analysed.
    # - empty dictionary is created and then keys and their respective counts are added into it through "if-else" statement.

# ========================
# ========================

        ### The "get" Method for dictionaries

# "get" collapses the four lines of code into one line.
    # 4 lines means use of "if-else" statement to check and process the existance of a name in a dictionary.

if name in counts:
    x = counts[name]
else:
    x = 0

# "alternatively"

x = counts.get(name, 0)             # {'csev': 2, 'cwen': 2, 'zqain': 1, 1: 5, 2: 3, 3: 1}

# ---------- Same exmple discussed above with "get" solution -----------


counts = dict()
names = ['csev', 'cwen', 'csev', 'zqain', 'cwen', 1, 2, 2, 2, 1, 1, 1, 1, 3]
for name in names:
    counts[name] = counts.get(name,
                              0) + 1  # idiom -- will be used commonly in future, though for now its difficult to dijest.
print(counts)

# Notes:
# 0 is set to be deault value
# in Python, we often need to calculate the frequency of data.
# so this "get-method" is quite useful in Python programming.

# =====================================
# =====================================

            # Counting Words #
            # Seeing through files instead of just strings

# Counting Pattern

counts = dict()
print("Enter a line of Text:")      # Just a title statement
line = input('')
words = line.split()
print('Words: ', words)             # Title statement with the output of previous function i.e split()
print('Counting ...')               # Title statement for next output; our desired outcome.
for word in words:
    counts[word] = counts.get(word,0) + 1
print(counts)
# Further: though next topic
for key in counts:                  # Why this needed?
    print(key, counts[key])             # gives output as "key" & respective "value"


# Issue: full stop, case of characters etc are letting the program consider the words seprately, seprately.


# -------------------------------------------
# -------------------------------------------
        ### Definite Loops and Dictionaries ###

# the ways you can loop through the dictionary: in above cases we saw how we can build dictionary through using loop.

# "for loops" can be written up for dictionaries.
    # These will go through all the keys of dictionaries and looks up the values.

counts = {'chuck': 1, 'fred': 42, 'jan': 100}
for key in counts:
    print(key, counts[key])     # key == key in dictionary, and counts[key] == pull the values based on "index-key"
            # answer:
                    # chuck 1
                    # fred 42
                    # jan 100

# --------------------------------
# --------------------------------

        ### Retrieving lists of Keys and Values ###

# you can get a list of keys, values or items (both: key and value) from a dictionary

# following are different methods to get the desired outcome
    # - getting keys only
    # - getting values only
                # even though you can't predict the order, if you ask for keys and values -
                #  in two different command, -
                #  the outcome will be in order.
    # - getting both keys and values (i.e. items)

jjj = {'chuck': 1, 'fred': 42, 'jan': 100}
print(list(jjj))            # ['chuck', 'fred', 'jan']
print(jjj.keys())           # dict_keys(['chuck', 'fred', 'jan'])
print(jjj.values())         # dict_values([1, 42, 100])

# now next command will give output of a Data-Structure.
print(jjj.items())          # dict_items([('chuck', 1), ('fred', 42), ('jan', 100)])
                                    # This a List: and its OUR FIRST REALLY combined DATA-STRUCTURE.

                                        # tuple" coming soon e.g. -- ('chuck', 1)

# ----------------------
# ----------------------

#### BONUS: Two Iteration Variables!    ####
# happens only in Python.

# We loop through the key-value pairs in a dictionary using *two* iteration variables.
# Each iteration, the first variable is the "key" and the second variable is the corresponding "value" for the key.
# here two iteration variables and ".items()" are basically related. else you can't get desired outcome.

jjj = {'chuck': 1, 'fred': 42, 'jan': 100}
for aaa, bbb in jjj.items():
    print(aaa,bbb)
print('\n')

# getting only keys
for aaa, bbb in jjj.items():
    print(aaa)
print('\n')

# getting only values
for aaa, bbb in jjj.items():
    print(bbb)

# ----------------------------
# ----------------------------
# ----------------------------

##### the example of code shown at very early stage of lectures.

name = input('Enter File Name: ')
handle = open(name)
counts = dict()
for line in handle:
    words = line.split()
    for word in words:
        counts[word] = counts.get(word, 0) + 1
print(counts)
# code for just getting organised output of words frequency
for key in counts:
    print(key, '\t\t', counts[key])

# -----------------------------
# -----------------------------


name = input('Enter File Name: ')
handle = open(name)
counts = dict()
for line in handle:
    words = line.split()
    for word in words:
        counts[word] = counts.get(word, 0) + 1      # this will build histogram

# now for getting the maximum counted item
big_count = None
big_word = None
for word, count in counts.items():                   # using Two Iteration Variable Method
    if big_count is None or count > big_count:         # conditional statement is based on frequency (value)
        big_word = word
        big_count = count
print(big_word, big_count)                # jan 352

# SubhanAllah   #

# =================================
# -------------------------- CHAPTER No. 9
# -------------------------- Dictionaries - Counting Word Frequency using a Dictionary
# =================================

# Step 1:
    # writing initial code while testing it at the same time, and finalizing it if its working properly.

# Assignment : word count

fname = input('Enter File: ')
if len(fname) < 1:
    fname = 'clown.txt'
hand = open(fname)
for lin in hand:
    lin = lin.rstrip()              # removing white-spaces from right
    print(lin)
    wds = lin.split()               # spliting the line into words i.e. from string to list.
    print(wds)

# ----------- # step 2,   moving further
# ----------- # step 2,   moving further

# Assignment : word count

fname = input('Enter File: ')
if len(fname) < 1:
    fname = 'clown.txt'
hand = open(fname)
for lin in hand:
    lin = lin.rstrip()              # removing white-spaces from right
    # print(lin)
    wds = lin.split()               # spliting the line into words i.e. from string to list.
    # print(wds)
    for w in wds:                   # for printing each word in next line.
        print(w)                    # it just hit all of the line, all of the words

# ----------- # step 3,   moving further - where dictionary comes in
# ----------- # step 3,   moving further - where dictionary comes in
# ----------- # step 3,   moving further - where dictionary comes in

# Assignment : word count

fname = input('Enter File: ')
if len(fname) < 1:
    fname = 'clown.txt'
hand = open(fname)

di = dict()                         # the key thing about dictionary is, we are gonna make a counter.
                                            # and here words will be used as index.
for lin in hand:
    lin = lin.rstrip()
    # print(lin)
    wds = lin.split()
    # print(wds)
    for w in wds:
        print(w)                            # word at start
        if w in di:                         # this will work if the word is already in the dictionary
            di[w] = di[w] + 1
            print('**Existing**')           # this will work if the word is appearing first time in dictionary
        else:
            di[w] = 1
            print('**New**')
        print(di[w])                        # count at the end
        print('\n')
print(di)


# ----------- # step 4,   moving further - get mechanism
# ----------- # step 4,   moving further - get mechanism
# ----------- # step 4,   moving further - get mechanism
# ----------- # step 4,   moving further - get mechanism

# Assignment : word count

fname = input('Enter File: ')
if len(fname) < 1:
    fname = 'clown.txt'
hand = open(fname)

di = dict()
for lin in hand:
    lin = lin.rstrip()
    # print(lin)
    wds = lin.split()
    # print(wds)
    for w in wds:
        print('**', w, di.get(w, -99))                      # di.get(w, -99) is the important part here.

        if w in di:                         #   This
            di[w] = di[w] + 1               #   Code
        else:                               #   of four lines
            di[w] = 1                       #   here

                                # Alternatively we have option to do the same function through one line of code only.


        # print(w, di[w])
print(di)


# ----------- # step 5,   moving further - get mechanism - finding the count
# ----------- # step 5,   moving further - get mechanism - finding the count
# ----------- # step 5,   moving further - get mechanism - finding the count
# ----------- # step 5,   moving further - get mechanism - finding the count
# ----------- # step 5,   moving further - get mechanism - finding the count

# Assignment : word count

fname = input('Enter File: ')
if len(fname) < 1:
    fname = 'clown.txt'
hand = open(fname)

di = dict()
for lin in hand:
    lin = lin.rstrip()
    # print(lin)
    wds = lin.split()
    # print(wds)
    for w in wds:
        # if the key is not there the count is zero
        oldcount = di.get(w, 0)         # means lookup old count that you have. if you don't then put up 0.
        print(w, 'old', oldcount)
        newcount = oldcount + 1         # if you found old count, then add 1 to the old count. this will become the new count.
        di[w] = newcount
        print(w, 'new', newcount)

print(di)

# ----------- # step 6,   moving further - get mechanism - in one line code
# ----------- # step 6,   moving further - get mechanism - in one line code
# ----------- # step 6,   moving further - get mechanism - in one line code
# ----------- # step 6,   moving further - get mechanism - in one line code
# ----------- # step 6,   moving further - get mechanism - in one line code
# ----------- # step 6,   moving further - get mechanism - in one line code

# Assignment : word count

fname = input('Enter File: ')
if len(fname) < 1:
    fname = 'clown.txt'
hand = open(fname)

di = dict()
for lin in hand:
    lin = lin.rstrip()
    # print(lin)
    wds = lin.split()
    # print(wds)
    for w in wds:
        # idiom: retrieve/create/update counter
        di[w] = di.get(w, 0) + 1
        print(w, 'new', di[w])

print(di)

# ----------- # step 7,   moving further - finding most common word
# ----------- # step 7,   moving further - finding most common word
# ----------- # step 7,   moving further - finding most common word
# ----------- # step 7,   moving further - finding most common word
# ----------- # step 7,   moving further - finding most common word
# ----------- # step 7,   moving further - finding most common word
# ----------- # step 7,   moving further - finding most common word

# Assignment : word count

fname = input('Enter File: ')
if len(fname) < 1:
    fname = 'clown.txt'
hand = open(fname)

di = dict()
for lin in hand:
    lin = lin.rstrip()
    wds = lin.split()
    for w in wds:
        # idiom: retrieve/create/update counter
        di[w] = di.get(w, 0) + 1

# print(di)
# once we print out this dictionary here and we verified that this is right, don't worry too much the code above here

# now finding most common word
# this is like a maximum-loop

largest = -1        # though this is a bad assumption but as we know the count here will always be positive number so we can assume here -1
theword = None
for k, v in di.items():             # ".items()" is a method inside of all dictionaries, that says give me key-value pairs and we need two iteration variables.
    print(k, v)
    if v > largest:
        largest = v
        theword = k                 # capture/remember the "key" that was largest
print('Done', theword, largest)





# ----------- # step 8,   moving further - making the code cleaner
# ----------- # step 8,   moving further - making the code cleaner
# ----------- # step 8,   moving further - making the code cleaner
# ----------- # step 8,   moving further - making the code cleaner
# ----------- # step 8,   moving further - making the code cleaner
# ----------- # step 8,   moving further - making the code cleaner
# ----------- # step 8,   moving further - making the code cleaner
# ----------- # step 8,   moving further - making the code cleaner

# Assignment : word count

fname = input('Enter File: ')
if len(fname) < 1:
    fname = 'clown.txt'
hand = open(fname)

di = dict()
for lin in hand:
    lin = lin.rstrip()
    wds = lin.split()
    for w in wds:
        # idiom: retrieve/create/update counter
        di[w] = di.get(w, 0) + 1

# print(di)
# once we print out this dictionary here and we verified that this is right, don't worry too much the code above here

# now finding most common word
# this is like a maximum-loop

largest = -1
theword = None
for k, v in di.items():
    if v > largest:
        largest = v
        theword = k
print(theword, largest)

# ===========================================
# ===========================================
# ===========================================
# ===========================================

######## CHAPTER 10
######## TUPLES
######## 5:23:06

# ============================================
# ============================================
# ============================================
# ============================================


### Debugging Assignment ###

# code to be fixed:

han = open('emaildata')
for line in han:
line = line.rstrip() # removed white spaces
words = line.split() # spliting the line
if words[0] != 'From':
continue
print(words[2])
# --------
# now after fixing:

han = open('emaildata')
for line in han:
line = line.rstrip() # removed white spaces
print('LINE: ', line) # print each line to see the whole process going on before T.B.
words = line.split() # spliting the line
print('WORDS: ', words) # print the splitted line to check at which point program breaks
# Guardian Pattern
if len(words) < 1: # fixing the issue by setting the minimum leve for range to be appropriate.
continue
if words[0] != 'From':
print('ignore')
continue
print(words[2])

# Final shape of a clean code

han = open('emaildata')
for line in han:
line = line.rstrip()
words = line.split()
if len(words) < 1:
continue
if words[0] != 'From':
continue
print(words[2])

# ------------------ alternative approach to creating a guardian

# Alternatively to writing a code for Guradian, there can be to "protect

han = open('emaildata')
for line in han:
line = line.rstrip() # removed white spaces
print('LINE: ', line)
if line == '':
print('skipskipskipskipskipskipskipskipskipskipskipskipskipskipskipskipskipskipskipskip')
continue
words = line.split() # spliting the line
print('WORDS: ', words)
if words[0] != 'From':
print('ignore')
continue
print(words[2])

# Final code
han = open('emaildata')
for line in han:
line = line.rstrip()
if line == '': # Protecting approach: code for skipping the empty line
continue
words = line.split()
if words[0] != 'From':
continue
print(words[2])


# -------------- making guardian a bit stronger
# Guardian approach: making the guardian a bit stronger
# this depends on you judgement and your assumption about the readability of data.


han = open('emaildata')
for line in han:
line = line.rstrip()
words = line.split()
# guardian a bit stronger
if len(words) < 3: # changing minimum number from 1 to 3.
continue
if words[0] != 'From':
continue
print(words[2])

# -------------- making guardian in compound statement

# Guardian approach: creating code for guradian in a compound statement.
# Remamber, the order of code is really important from left to right.
# if first condition blows-up, the programme will give T.B. so be careful.
han = open('emaildata')
for line in han:
line = line.rstrip()
words = line.split()
# guardian in a compound statement: Order is very important (from left to right).
if len(words) < 3 or words[0] != 'From': # means either one of the condition is true, it will skip the line.
continue
print(words[2])

##### SUMMARY #####

# As its observed, its not always about a wrong line,
# debugging can be about adding additional code to support the code already written.
# there are multiple ways to fix the issue.
# its always flexible, and depends upon your own capability to write a cleaner, better code.
# Adding a print statement to check upon whats going on is really helpful sometimes.

# upto this point @ 13 July,2020
# POSTED # # POSTED # # POSTED # # POSTED # # POSTED # # POSTED # # POSTED # # POSTED # # POSTED # # POSTED # # POSTED # # POSTED # # POSTED # # POSTED # # POSTED # # POSTED #


# ==============================
# ==============================
# ============================== CHAPTER No. 9
# ============================== Python Directories.
# ============================== 4:29:00
# ==============================

# Definition
# - very powerful
# - they are like a little in memory databases
# Collection is defined as:
# a situation where we are going to have a variable like a list or dictionary.

# Non-collection is defined as:
# Most of our variables have one value in them - when we put a new value in them, the old value is overwritten.

# Lists vs Dictionary
# List:
# A linear collection of values that stay in order.
# an organized version of collection of values.
# kind of like Pringles.

# Dictionary:
# A "bag" of values, each with its own label.
# These are messier, where values are not in order.
# Everything has a "key".
# About Dictionaries:
# - Dictionaries are Python's most powerful data collection
# - Dictionaries allows us to do fast database-like operations
# - Dictionaries have different names in different languages:
# - Associative Arrays - Perl / PHP
# - Properties or Map or HashMap - Java
# - Property Bag - C# / .Net

# Associative means that there is association between key and a value.
# whereas in list, there is a position (index-position).
# - Position is less powerful, less flexible

# ----------------

# Lists "index" their entries based on the position in the list
# Dictionaries are like bags - no order
# So we "index" the things we put in "dictionaary" with a lookup-tag.

purse = dict()
purse['money'] = 12
purse['candy'] = 3
purse['tissues'] = 75
print(purse) # {'money': 12, 'candy': 3, 'tissues': 75}
print(purse['candy']) # 3
purse['candy'] = purse['candy'] + 2
print(purse['candy']) # 5
print(purse) # {'money': 12, 'candy': 5, 'tissues': 75}
print(len(purse)) # 3 -- means there are 3 items in this dictionary

# --------------------------

# Comparing Lists and Dictionaries
# Similarity: Dictionaries are like "lists" except that they use "keys" instead of numbers to lookup values.

# Assignment: How we store things in lists and dictionaries, and how we look things up in them?

# creating list, adding values to list and changing (mutation) values in lists through index number (position)
lst = list()
lst.append(3)
lst.append(127)
print(lst) # [3, 127]
lst[0] = 5 # [5, 127]

# creating dictionaries, adding values to dictionaries and changing values in dictionaries through keys (tags)
ddd = dict()
ddd['age'] = 36
ddd['course'] = 'Python'
print(ddd) # {'age': 36, 'course': 'Python'}
ddd['age'] = 25
print(ddd) # {'age': 25, 'course': 'Python'}
ddd[3] = 'numeric value'
print(ddd) # {'age': 25, 'course': 'Python', 3: 'numeric value'} --
# even numbers can be used as dictionary-index or key in dictionaries

# -------------------------

# Dictionary Literals (Constants) : Literal is a notation for representing a fixed value in source code.
# - Dictionary "literals" use curly braces "{}" and have a list of "key:value" pairs.
# a = {} and a = dict() are same things ........ i.e. {} and dict() are same things.
# both are used to create empty dictionaries.
# you can also create the dictionery and fill it with some data at same time in single line of code

# creating dictionary with data

jjj = {'chuck' : 1, 'fred' : 42, 'jan' : 100}
print(jjj) # {'chuck': 1, 'fred': 42, 'jan': 100}

aaa = {} # empty dictionary created
print(aaa) # {}

bbb = dict() # again emplty dictionary created
print(bbb) # {}

# ----------------------

#### COUNTING ####

# Most Common Name?
# One of the common application of dictionaries: Making Histograms i.e. counting the frequency of things.
ccc = dict()
ccc['csev'] = 1 # Values are assigned as 1,1 as we are here counting the frequency.
# as there weiil be a process of adding the values, we can do the process of summation of values of keys.
ccc['cwen'] = 1
print(ccc) # {'csev': 1, 'cwen': 1}
ccc['cwen'] = ccc['cwen'] + 1
print(ccc) # {'csev': 1, 'cwen': 2}

# Process explained:
# First you check if this neme is there in the dictionary (collection) already.
# if its already there, you really wanna add 1 to it.
# if you see this the first time, then you stick value 1 to it.
# so you use the name as "key".

# --------------------------

### Dictionary Traceback ###

# its an "error" to refer a key which is not there in the dictionary.

# we can use "in" operator to ckeck if a key is there in dictionary.

ccc = dict()
print('csew' in ccc) # False
print(ccc['csew']) # T.B. -- program blows up

# by using "in" operator we can avoid T.B.
# after checking if the "key" is not there in the dictionary, put it in.
# if its already there, then add 1 to the value of it.

# -------- Above situation leads to the following code. -----------------

# How to do when we see a new name.
# use of "if-else" statement to add keys into dictionary.
# This is the "histogram" code.

counts = dict()
names = ['csev', 'cwen', 'csev', 'zqain', 'cwen', 1, 2, 2, 2, 1, 1, 1, 1, 3]
for name in names:
if name not in counts:
counts[name] = 1
else:
counts[name] = counts[name] + 1
print(counts) # {'csev': 2, 'cwen': 2, 'zqain': 1, 1: 5, 2: 3, 3: 1}

# Notes:
# - List is the source of data, which is to be analysed.
# - empty dictionary is created and then keys and their respective counts are added into it through "if-else" statement.

# ========================
# ========================

### The "get" Method for dictionaries

# "get" collapses the four lines of code into one line.
# 4 lines means use of "if-else" statement to check and process the existance of a name in a dictionary.

if name in counts:
x = counts[name]
else:
x = 0

# "alternatively"

x = counts.get(name, 0) # {'csev': 2, 'cwen': 2, 'zqain': 1, 1: 5, 2: 3, 3: 1}

# ---------- Same exmple discussed above with "get" solution -----------


counts = dict()
names = ['csev', 'cwen', 'csev', 'zqain', 'cwen', 1, 2, 2, 2, 1, 1, 1, 1, 3]
for name in names:
counts[name] = counts.get(name,
0) + 1 # idiom -- will be used commonly in future, though for now its difficult to dijest.
print(counts)

# Notes:
# 0 is set to be deault value
# in Python, we often need to calculate the frequency of data.
# so this "get-method" is quite useful in Python programming.

# =====================================
# =====================================

# Counting Words #
# Seeing through files instead of just strings

# Counting Pattern

counts = dict()
print("Enter a line of Text:") # Just a title statement
line = input('')
words = line.split()
print('Words: ', words) # Title statement with the output of previous function i.e split()
print('Counting ...') # Title statement for next output; our desired outcome.
for word in words:
counts[word] = counts.get(word,0) + 1
print(counts)
# Further: though next topic
for key in counts: # Why this needed?
print(key, counts[key]) # gives output as "key" & respective "value"


# Issue: full stop, case of characters etc are letting the program consider the words seprately, seprately.


# -------------------------------------------
# -------------------------------------------
### Definite Loops and Dictionaries ###

# the ways you can loop through the dictionary: in above cases we saw how we can build dictionary through using loop.

# "for loops" can be written up for dictionaries.
# These will go through all the keys of dictionaries and looks up the values.

counts = {'chuck': 1, 'fred': 42, 'jan': 100}
for key in counts:
print(key, counts[key]) # key == key in dictionary, and counts[key] == pull the values based on "index-key"
# answer:
# chuck 1
# fred 42
# jan 100

# --------------------------------
# --------------------------------

### Retrieving lists of Keys and Values ###

# you can get a list of keys, values or items (both: key and value) from a dictionary

# following are different methods to get the desired outcome
# - getting keys only
# - getting values only
# even though you can't predict the order, if you ask for keys and values -
# in two different command, -
# the outcome will be in order.
# - getting both keys and values (i.e. items)

jjj = {'chuck': 1, 'fred': 42, 'jan': 100}
print(list(jjj)) # ['chuck', 'fred', 'jan']
print(jjj.keys()) # dict_keys(['chuck', 'fred', 'jan'])
print(jjj.values()) # dict_values([1, 42, 100])

# now next command will give output of a Data-Structure.
print(jjj.items()) # dict_items([('chuck', 1), ('fred', 42), ('jan', 100)])
# This a List: and its OUR FIRST REALLY combined DATA-STRUCTURE.

# tuple" coming soon e.g. -- ('chuck', 1)

# ----------------------
# ----------------------

#### BONUS: Two Iteration Variables! ####
# happens only in Python.

# We loop through the key-value pairs in a dictionary using *two* iteration variables.
# Each iteration, the first variable is the "key" and the second variable is the corresponding "value" for the key.
# here two iteration variables and ".items()" are basically related. else you can't get desired outcome.

jjj = {'chuck': 1, 'fred': 42, 'jan': 100}
for aaa, bbb in jjj.items():
print(aaa,bbb)
print('\n')

# getting only keys
for aaa, bbb in jjj.items():
print(aaa)
print('\n')

# getting only values
for aaa, bbb in jjj.items():
print(bbb)

# ----------------------------
# ----------------------------
# ----------------------------

##### the example of code shown at very early stage of lectures.

name = input('Enter File Name: ')
handle = open(name)
counts = dict()
for line in handle:
words = line.split()
for word in words:
counts[word] = counts.get(word, 0) + 1
print(counts)
# code for just getting organised output of words frequency
for key in counts:
print(key, '\t\t', counts[key])

# -----------------------------
# -----------------------------


name = input('Enter File Name: ')
handle = open(name)
counts = dict()
for line in handle:
words = line.split()
for word in words:
counts[word] = counts.get(word, 0) + 1 # this will build histogram

# now for getting the maximum counted item
big_count = None
big_word = None
for word, count in counts.items(): # using Two Iteration Variable Method
if big_count is None or count > big_count: # conditional statement is based on frequency (value)
big_word = word
big_count = count
print(big_word, big_count) # jan 352

# SubhanAllah #

# =================================
# -------------------------- CHAPTER No. 9
# -------------------------- Dictionaries - Counting Word Frequency using a Dictionary
# =================================

# Step 1:
# writing initial code while testing it at the same time, and finalizing it if its working properly.

# Assignment : word count

fname = input('Enter File: ')
if len(fname) < 1:
fname = 'clown.txt'
hand = open(fname)
for lin in hand:
lin = lin.rstrip() # removing white-spaces from right
print(lin)
wds = lin.split() # spliting the line into words i.e. from string to list.
print(wds)

# ----------- # step 2, moving further
# ----------- # step 2, moving further

# Assignment : word count

fname = input('Enter File: ')
if len(fname) < 1:
fname = 'clown.txt'
hand = open(fname)
for lin in hand:
lin = lin.rstrip() # removing white-spaces from right
# print(lin)
wds = lin.split() # spliting the line into words i.e. from string to list.
# print(wds)
for w in wds: # for printing each word in next line.
print(w) # it just hit all of the line, all of the words

# ----------- # step 3, moving further - where dictionary comes in
# ----------- # step 3, moving further - where dictionary comes in
# ----------- # step 3, moving further - where dictionary comes in

# Assignment : word count

fname = input('Enter File: ')
if len(fname) < 1:
fname = 'clown.txt'
hand = open(fname)

di = dict() # the key thing about dictionary is, we are gonna make a counter.
# and here words will be used as index.
for lin in hand:
lin = lin.rstrip()
# print(lin)
wds = lin.split()
# print(wds)
for w in wds:
print(w) # word at start
if w in di: # this will work if the word is already in the dictionary
di[w] = di[w] + 1
print('**Existing**') # this will work if the word is appearing first time in dictionary
else:
di[w] = 1
print('**New**')
print(di[w]) # count at the end
print('\n')
print(di)


# ----------- # step 4, moving further - get mechanism
# ----------- # step 4, moving further - get mechanism
# ----------- # step 4, moving further - get mechanism
# ----------- # step 4, moving further - get mechanism

# Assignment : word count

fname = input('Enter File: ')
if len(fname) < 1:
fname = 'clown.txt'
hand = open(fname)

di = dict()
for lin in hand:
lin = lin.rstrip()
# print(lin)
wds = lin.split()
# print(wds)
for w in wds:
print('**', w, di.get(w, -99)) # di.get(w, -99) is the important part here.

if w in di: # This
di[w] = di[w] + 1 # Code
else: # of four lines
di[w] = 1 # here

# Alternatively we have option to do the same function through one line of code only.


# print(w, di[w])
print(di)


# ----------- # step 5, moving further - get mechanism - finding the count
# ----------- # step 5, moving further - get mechanism - finding the count
# ----------- # step 5, moving further - get mechanism - finding the count
# ----------- # step 5, moving further - get mechanism - finding the count
# ----------- # step 5, moving further - get mechanism - finding the count

# Assignment : word count

fname = input('Enter File: ')
if len(fname) < 1:
fname = 'clown.txt'
hand = open(fname)

di = dict()
for lin in hand:
lin = lin.rstrip()
# print(lin)
wds = lin.split()
# print(wds)
for w in wds:
# if the key is not there the count is zero
oldcount = di.get(w, 0) # means lookup old count that you have. if you don't then put up 0.
print(w, 'old', oldcount)
newcount = oldcount + 1 # if you found old count, then add 1 to the old count. this will become the new count.
di[w] = newcount
print(w, 'new', newcount)

print(di)

# ----------- # step 6, moving further - get mechanism - in one line code
# ----------- # step 6, moving further - get mechanism - in one line code
# ----------- # step 6, moving further - get mechanism - in one line code
# ----------- # step 6, moving further - get mechanism - in one line code
# ----------- # step 6, moving further - get mechanism - in one line code
# ----------- # step 6, moving further - get mechanism - in one line code

# Assignment : word count

fname = input('Enter File: ')
if len(fname) < 1:
fname = 'clown.txt'
hand = open(fname)

di = dict()
for lin in hand:
lin = lin.rstrip()
# print(lin)
wds = lin.split()
# print(wds)
for w in wds:
# idiom: retrieve/create/update counter
di[w] = di.get(w, 0) + 1
print(w, 'new', di[w])

print(di)

# ----------- # step 7, moving further - finding most common word
# ----------- # step 7, moving further - finding most common word
# ----------- # step 7, moving further - finding most common word
# ----------- # step 7, moving further - finding most common word
# ----------- # step 7, moving further - finding most common word
# ----------- # step 7, moving further - finding most common word
# ----------- # step 7, moving further - finding most common word

# Assignment : word count

fname = input('Enter File: ')
if len(fname) < 1:
fname = 'clown.txt'
hand = open(fname)

di = dict()
for lin in hand:
lin = lin.rstrip()
wds = lin.split()
for w in wds:
# idiom: retrieve/create/update counter
di[w] = di.get(w, 0) + 1

# print(di)
# once we print out this dictionary here and we verified that this is right, don't worry too much the code above here

# now finding most common word
# this is like a maximum-loop

largest = -1 # though this is a bad assumption but as we know the count here will always be positive number so we can assume here -1
theword = None
for k, v in di.items(): # ".items()" is a method inside of all dictionaries, that says give me key-value pairs and we need two iteration variables.
print(k, v)
if v > largest:
largest = v
theword = k # capture/remember the "key" that was largest
print('Done', theword, largest)





# ----------- # step 8, moving further - making the code cleaner
# ----------- # step 8, moving further - making the code cleaner
# ----------- # step 8, moving further - making the code cleaner
# ----------- # step 8, moving further - making the code cleaner
# ----------- # step 8, moving further - making the code cleaner
# ----------- # step 8, moving further - making the code cleaner
# ----------- # step 8, moving further - making the code cleaner
# ----------- # step 8, moving further - making the code cleaner

# Assignment : word count

fname = input('Enter File: ')
if len(fname) < 1:
fname = 'clown.txt'
hand = open(fname)

di = dict()
for lin in hand:
lin = lin.rstrip()
wds = lin.split()
for w in wds:
# idiom: retrieve/create/update counter
di[w] = di.get(w, 0) + 1

# print(di)
# once we print out this dictionary here and we verified that this is right, don't worry too much the code above here

# now finding most common word
# this is like a maximum-loop

largest = -1
theword = None
for k, v in di.items():
if v > largest:
largest = v
theword = k
print(theword, largest)

# ===========================================
# ===========================================
# ===========================================
# ===========================================

######## CHAPTER 10
######## TUPLES
######## 5:23:06

# ============================================
# ============================================
# ============================================
# ============================================








Comments