In [17]:
## 9.1

import requests
from bs4 import BeautifulSoup

website_content = requests.get("http://codingsocialscience.org/").text

page = BeautifulSoup ( website_content , 'html.parser' )

for list in page.find_all('li'):
    print( list )
<li class="nav-item">
<a class="nav-link page-scroll" href=".">HOME <span class="sr-only">(current)</span></a>
</li>
<li class="nav-item">
<a class="nav-link page-scroll" href="/book/r1/">Review book now</a>
</li>
<li>The blended approach used in this book reflects my teaching and situates computational methods in social research, providing both ideas on research design and hands-on tools to implement such research.</li>
<li>It covers both top-level research design and more detailed approaches on how to implement and conduct research using computational methods.</li>
<li>By exploring some key traditions in computer science, it helps readers to understand computational approaches and problems from a social science perspective.</li>
<li>Situate computational social science research to fully appreciate its interdisciplinary nature, and understand the core challenges and benefits that computational methods provide for social sciences.</li>
<li>Understand how computational processes work through improving their computational thinking. They need a short introduction to programming to help "translate" a research problem into an algorithm, and to understand how algorithms work.</li>
<li>Gain familiarity with different method ‘families’ (data science, network analysis, simulation models, constructive work) to help understand the range of application domains and possibilities of computational methods.</li>
<li>Situating Computational Social Science</li>
<li>Programming and computational thinking</li>
<li>Algorithmic data analysis</li>
<li>Network analysis</li>
<li>Simulations and complex systems</li>
<li>Constructing interactive systems</li>
<li>Data structures</li>
<li>Best practices for software development</li>
<li>Research ethics adn Computational Social Science</li>
<li>Validity, reliability and computational social sciences</li>
<li>Integrating Computational Methods in a Social Science Research</li>
<li><a href="https://opetus.mante.li/computationalsocialscience/">Introduction to computational social science</a></li>
<li><a href="https://opetus.mante.li/programming/">Programming for social scientists</a></li>
<li><a href="https://opetus.mante.li/datascience/">Data Science for social scientists</a></li>
<li>Politics of Human-Computer Interaction.</li>
In [16]:
## 9.2

import requests
from bs4 import BeautifulSoup

website_content = requests.get("https://uk.sagepub.com/").text

page = BeautifulSoup ( website_content , 'html.parser' )

image_count = 0

for image in page.find_all('img'):
    image_count = image_count + 1
    
print( image_count )
5
In [18]:
## 9.3

import requests
from bs4 import BeautifulSoup

website_content = requests.get("https://www.helsinki.fi/").text
website_content = website_content.lower()

search_words = [ 'Twitter', 'Facebook', 'YouTube' ]

for search_word in search_words:
    if search_word.lower() in website_content:
        print("Found", search_word)
Found Twitter
Found Facebook
Found YouTube
In [21]:
## 9.4

import requests
from bs4 import BeautifulSoup

universities = ['https://www.helsinki.fi', 'https://www.aalto.fi']
results = {}

search_words = [ 'Twitter', 'Facebook', 'YouTube' ]

for university in universities:
    website_content = requests.get( university ).text
    website_content = website_content.lower()
    
    res = {} ## this is for storing service, count-pairs
    for search_word in search_words:
        res[ search_word ] = website_content.count( search_word.lower() )
        
    results[ university ] = res
    
print( results )
{'https://www.helsinki.fi': {'Twitter': 6, 'Facebook': 4, 'YouTube': 3}, 'https://www.aalto.fi': {'Twitter': 10, 'Facebook': 5, 'YouTube': 5}}
In [35]:
## 9.5

if False: ## automated testing fails, so remove for now

    import requests
    from bs4 import BeautifulSoup

    start_url = "http://codingsocialscience.org/"

    start_page = requests.get( start_url ).text
    start_page = BeautifulSoup ( start_page , 'html.parser' )

    for link in start_page.find_all('a'):
        link = link['href']

        ## there are both internal and external links, for simplicity let's identify external links as those starting with http

        if link.startswith('http'):
            print( start_url + "-" + link )
            followup = requests.get( link ).text
            followup = BeautifulSoup( followup, 'html.parser' )

            for followuplink in followup.find_all('a'):
                followuplink = followuplink['href']
                if followuplink.startswith('http'):
                    print( link + "-" + followuplink )
http://codingsocialscience.org/-https://www.helsinki.fi/en/faculty-of-social-sciences/centre-for-social-data-science-csds
<a class="visually-hidden focusable" href="#main-content">
      Skip to main content
  </a>
<a id="main-content" tabindex="-1"></a>
---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
<ipython-input-35-e2f2bb5ed058> in <module>
     19         for followuplink in followup.find_all('a'):
     20             print(followuplink)
---> 21             followuplink = followuplink['href']
     22             if followuplink.startswith('http'):
     23                 print( link + "-" + followuplink )

/usr/local/lib/python3.9/site-packages/bs4/element.py in __getitem__(self, key)
   1404         """tag[key] returns the value of the 'key' attribute for the Tag,
   1405         and throws an exception if it's not there."""
-> 1406         return self.attrs[key]
   1407 
   1408     def __iter__(self):

KeyError: 'href'
In [44]:
## 9.6

import requests
import json

url = 'https://data.police.uk/api/crimes-street/all-crime?lat=51.5073&lng=-0.171505' ## latest month is shown by default, see documentation
data = requests.get( url ).text ## could just be .json() for simplicyt, but doing here step by step
data = json.loads( data )

categories = {} ## this is a megacollector

for entry in data:
    
    category = entry['category']
    
    if category not in categories:
        categories[ category ] = 0
        
    categories[ category ] = categories[ category ] + 1
    

print( categories )
{'anti-social-behaviour': 237, 'bicycle-theft': 29, 'burglary': 98, 'criminal-damage-arson': 48, 'drugs': 56, 'other-theft': 400, 'possession-of-weapons': 4, 'public-order': 102, 'robbery': 74, 'shoplifting': 86, 'theft-from-the-person': 247, 'vehicle-crime': 116, 'violent-crime': 258, 'other-crime': 8}
In [53]:
## 9.7

import requests
import json

url = 'http://api.worldbank.org/v2/country/fi?format=json'
data = requests.get( url ).text ## could just be .json() for simplicyt, but doing here step by step
data = json.loads( data )

print( data[1][0]['capitalCity'] )
Helsinki
In [61]:
## 9.8

import requests
import json

adjacents = ['se', 'no', 'ru', 'ee']

for country in adjacents:
    url = 'http://api.worldbank.org/v2/country/' + country + '?format=json'
    data = requests.get( url ).text ## could just be .json() for simplicy, but doing here step by step
    data = json.loads( data )

    print( data[1][0]['capitalCity'] )
Stockholm
Oslo
Moscow
Tallinn
In [3]:
## 9.9

import requests
import json

url = 'http://api.worldbank.org/v2/country/all?format=json'
data = requests.get( url ).text ## could just be .json() for simplicy, but doing here step by step
data = json.loads( data )

## the book does not cover pagination, so let's this is not a full result

data = data[1] ## index 0 is for pagination details

for country in data:
    print( country['latitude'], country['longitude'], country['incomeLevel']['value'] )
12.5167 -70.0167 High income
  Aggregates
34.5228 69.1761 Low income
  Aggregates
  Aggregates
-8.81155 13.242 Lower middle income
41.3317 19.8172 Upper middle income
42.5075 1.5218 High income
  Aggregates
24.4764 54.3705 High income
-34.6118 -58.4173 Upper middle income
40.1596 44.509 Upper middle income
-14.2846 -170.691 Upper middle income
17.1175 -61.8456 High income
-35.282 149.129 High income
48.2201 16.3798 High income
40.3834 49.8932 Upper middle income
-3.3784 29.3639 Low income
  Aggregates
  Aggregates
50.8371 4.36761 High income
6.4779 2.6323 Lower middle income
12.3605 -1.53395 Low income
23.7055 90.4113 Lower middle income
42.7105 23.3238 Upper middle income
  Aggregates
26.1921 50.5354 High income
25.0661 -77.339 High income
43.8607 18.4214 Upper middle income
  Aggregates
53.9678 27.5766 Upper middle income
17.2534 -88.7713 Upper middle income
  Aggregates
32.3293 -64.706 High income
-13.9908 -66.1936 Lower middle income
-15.7801 -47.9292 Upper middle income
13.0935 -59.6105 High income
4.94199 114.946 High income
  Aggregates
27.5768 89.6177 Lower middle income
-24.6544 25.9201 Upper middle income
  Aggregates
5.63056 21.6407 Low income
45.4215 -75.6919 High income
  Aggregates
  Aggregates
  Aggregates
46.948 7.44821 High income
  High income
-33.475 -70.6475 High income
In [7]:
## 9.10

example = '127.0.0.1 - - [10/Nov/2020:08:43:18 +0200] "GET / HTTP/1.1" 200 4471 "-" "Mozilla/5.0"'
example = example.split('[')[1] ## everything to right from [
example = example.split(']')[0] ## everything to left from ]
## 10/Nov/2020:08:43:18 +0200 # you can print it out here
## print( example )
example = example.split(':')[0] ## everything left from :
date = example.split('/')
day = date[0]
month = date[1]
year = date[2]

print( year, month, day )
2020 Nov 10
In [8]:
## 9.11

from PyPDF2 import PdfReader

all_text = ''

reader = PdfReader('alice.pdf')
number_of_pages = len(reader.pages)


for i in range( number_of_pages ):
    page = reader.pages[i]
    text = page.extract_text()
    all_text = all_text + text
    
print( all_text )
the trouble of getting up and picking the daisies, when suddenly a White Rabbit with pink 
eyes ran close by her.  
There was nothing so VERY remarkable in that; nor did Alice think it so VERY much 
out of the way to he ar the Rabbit say to itself, 'Oh dear! Oh dear! I shall be late!' (when she 
thought it over afterwards, it occurred to her that she ought to have wondered at this, but at 
the time it all seemed quite natural); but when the Rabbit actually TOOK A WATCH OUT 
OF ITS WAISTCOAT -POCKET, and looked at it, and then hurried on, Alice started to her 
feet, for it flashed across her mind that she had never before seen a rabbit with either a 
waistcoat -pocket, or a watch to take out of it, and burning with curiosity, she ran across the 
field after it, and fortunately was just in time to see it pop down a large rabbit -hole under 
the hedge.  
In another moment down went Alice after it, never once considering how in the world 
she was to get out again.  
The rabbit -hole went str aight on like a tunnel for some way, and then dipped suddenly 
down, so suddenly that Alice had not a moment to think about stopping herself before she 
found herself falling down a very deep well.  
Either the well was very deep, or she fell very slowly, for  she had plenty of time as she 
went down to look about her and to wonder what was going to happen next. First, she tried 
to look down and make out what she was coming to, but it was too dark to see anything; 
then she looked at the sides of the well, and no ticed that they were filled with cupboards 
and book -shelves; here and there she saw maps and pictures hung upon pegs. She took 
down a jar from one of the shelves as she passed; it was labelled 'ORANGE 
MARMALADE', but to her great disappointment it was empt y: she did not like to drop the 
jar for fear of killing somebody, so managed to put it into one of the cupboards as she fell 
past it.  
'Well!' thought Alice to herself, 'after such a fall as this, I shall think nothing of tumbling 
down stairs! How brave th ey'll all think me at home! Why, I wouldn't say anything about it, 
even if I fell off the top of the house!' (Which was very likely true.)  
Down, down, down. Would the fall NEVER come to an end! 'I wonder how many miles 
I've fallen by this time?' she said aloud. 'I must be getting somewhere near the centre of the 
earth. Let me see: that would be four thousand miles down, I think —' (for, you see, Alice 
had learnt several things of this sort in her lessons in the schoolroom, and though this was 
not a VERY goo d opportunity for showing off her knowledge, as there was no one to listen 
to her, still it was good practice to say it over) ' —yes, that's about the right distance —but 
then I wonder what Latitude or Longitude I've got to?' (Alice had no idea what Latitude  
was, or Longitude either, but thought they were nice grand words to say.)  
Presently she began again. 'I wonder if I shall fall right THROUGH the earth! How funny 
it'll seem to come out among the people that walk with their heads downward! The 
Antipathies , I think —' (she was rather glad there WAS no one listening, this time, as it 
didn't sound at all the right word) ' —but I shall have to ask them what the name of the 
country is, you know. Please, Ma'am, is this New Zealand or Australia?' (and she tried to 
curtsey as she spoke —fancy CURTSEYING as you're falling through the air! Do you think you could manage it?) 'And what an ignorant little girl she'll think me for asking! No, it'll 
never do to ask: perhaps I shall see it written up somewhere.'  
Down, down, down. There was nothing else to do, so Alice soon began talking again. 
'Dinah'll miss me very much to -night, I should think!' (Dinah was the cat.) 'I hope they'll 
remember her saucer of milk at tea -time. Dinah my dear! I wish you were down here with 
me! Th ere are no mice in the air, I'm afraid, but you might catch a bat, and that's very like a 
mouse, you know. But do cats eat bats, I wonder?' And here Alice began to get rather 
sleepy, and went on saying to herself, in a dreamy sort of way, 'Do cats eat bats ? Do cats eat 
bats?' and sometimes, 'Do bats eat cats?' for, you see, as she couldn't answer either question, 
it didn't much matter which way she put it. She felt that she was dozing off, and had just 
begun to dream that she was walking hand in hand with D inah, and saying to her very 
earnestly, 'Now, Dinah, tell me the truth: did you ever eat a bat?' when suddenly, thump! 
thump! down she came upon a heap of sticks and dry leaves, and the fall was over.  
Alice was not a bit hurt, and she jumped up on to her feet in a moment: she looked up, 
but it was all dark overhead; before her was another long passage, and the White Rabbit was 
still in sight, hurrying down it. There was not a moment to be lost: away wen t Alice like the 
wind, and was just in time to hear it say, as it turned a corner, 'Oh my ears and whiskers, 
how late it's getting!' She was close behind it when she turned the corner, but the Rabbit 
was no longer to be seen: she found herself in a long, l ow hall, which was lit up by a row of 
lamps hanging from the roof.  
There were doors all round the hall, but they were all locked; and when Alice had been 
all the way down one side and up the other, trying every door, she walked sadly down the 
middle, wond ering how she was ever to get out again.  
Suddenly she came upon a little three -legged table, all made of solid glass; there was 
nothing on it except a tiny golden key, and Alice's first thought was that it might belong to 
one of the doors of the hall; but , alas! either the locks were too large, or the key was too 
small, but at any rate it would not open any of them. However, on the second time round, 
she came upon a low curtain she had not noticed before, and behind it was a little door 
about fifteen inche s high: she tried the little golden key in the lock, and to her great delight 
it fitted!  
Alice opened the door and found that it led into a small passage, not much larger than a 
rat-hole: she knelt down and looked along the passage into the loveliest gard en you ever 
saw. How she longed to get out of that dark hall, and wander about among those beds of 
bright flowers and those cool fountains, but she could not even get her head through the 
doorway; 'and even if my head would go through,' thought poor Alice,  'it would be of very 
little use without my shoulders. Oh, how I wish I could shut up like a telescope! I think I 
could, if I only know how to begin.' For, you see, so many out -of-the-way things had 
happened lately, that Alice had begun to think that very few things indeed were really 
impossible.  
There seemed to be no use in waiting by the little door, so she went back to the table, half 
hoping she might find another key on it, or at any rate a book of rules for shutting people up 
like telescopes: this tim e she found a little bottle on it, ('which certainly was not here 
In [9]:
## 9.12

from docx import Document

all_text = ''

document = Document('alice.docx')

for paragraph in document.paragraphs:
    all_text = all_text + paragraph.text
    
print( all_text )
the trouble of getting up and picking the daisies, when suddenly a White Rabbit with pink eyes ran close by her.There was nothing so VERY remarkable in that; nor did Alice think it so VERY much out of the way to hear the Rabbit say to itself, 'Oh dear! Oh dear! I shall be late!' (when she thought it over afterwards, it occurred to her that she ought to have wondered at this, but at the time it all seemed quite natural); but when the Rabbit actually TOOK A WATCH OUT OF ITS WAISTCOAT-POCKET, and looked at it, and then hurried on, Alice started to her feet, for it flashed across her mind that she had never before seen a rabbit with either a waistcoat-pocket, or a watch to take out of it, and burning with curiosity, she ran across the field after it, and fortunately was just in time to see it pop down a large rabbit-hole under the hedge.In another moment down went Alice after it, never once considering how in the world she was to get out again.The rabbit-hole went straight on like a tunnel for some way, and then dipped suddenly down, so suddenly that Alice had not a moment to think about stopping herself before she found herself falling down a very deep well.Either the well was very deep, or she fell very slowly, for she had plenty of time as she went down to look about her and to wonder what was going to happen next. First, she tried to look down and make out what she was coming to, but it was too dark to see anything; then she looked at the sides of the well, and noticed that they were filled with cupboards and book-shelves; here and there she saw maps and pictures hung upon pegs. She took down a jar from one of the shelves as she passed; it was labelled 'ORANGE MARMALADE', but to her great disappointment it was empty: she did not like to drop the jar for fear of killing somebody, so managed to put it into one of the cupboards as she fell past it.'Well!' thought Alice to herself, 'after such a fall as this, I shall think nothing of tumbling down stairs! How brave they'll all think me at home! Why, I wouldn't say anything about it, even if I fell off the top of the house!' (Which was very likely true.)Down, down, down. Would the fall NEVER come to an end! 'I wonder how many miles I've fallen by this time?' she said aloud. 'I must be getting somewhere near the centre of the earth. Let me see: that would be four thousand miles down, I think—' (for, you see, Alice had learnt several things of this sort in her lessons in the schoolroom, and though this was not a VERY good opportunity for showing off her knowledge, as there was no one to listen to her, still it was good practice to say it over) '—yes, that's about the right distance—but then I wonder what Latitude or Longitude I've got to?' (Alice had no idea what Latitude was, or Longitude either, but thought they were nice grand words to say.)Presently she began again. 'I wonder if I shall fall right THROUGH the earth! How funny it'll seem to come out among the people that walk with their heads downward! The Antipathies, I think—' (she was rather glad there WAS no one listening, this time, as it didn't sound at all the right word) '—but I shall have to ask them what the name of the country is, you know. Please, Ma'am, is this New Zealand or Australia?' (and she tried to curtsey as she spoke—fancy CURTSEYING as you're falling through the air! Do you thinkyou could manage it?) 'And what an ignorant little girl she'll think me for asking! No, it'll never do to ask: perhaps I shall see it written up somewhere.'Down, down, down. There was nothing else to do, so Alice soon began talking again. 'Dinah'll miss me very much to-night, I should think!' (Dinah was the cat.) 'I hope they'll remember her saucer of milk at tea-time. Dinah my dear! I wish you were down here with me! There are no mice in the air, I'm afraid, but you might catch a bat, and that's very like a mouse, you know. But do cats eat bats, I wonder?' And here Alice began to get rather sleepy, and went on saying to herself, in a dreamy sort of way, 'Do cats eat bats? Do cats eat bats?' and sometimes, 'Do bats eat cats?' for, you see, as she couldn't answer either question, it didn't much matter which way she put it. She felt that she was dozing off, and had just begun to dream that she was walking hand in hand with Dinah, and saying to her very earnestly, 'Now, Dinah, tell me the truth: did you ever eat a bat?' when suddenly, thump! thump! down she came upon a heap of sticks and dry leaves, and the fall was over.Alice was not a bit hurt, and she jumped up on to her feet in a moment: she looked up, but it was all dark overhead; before her was another long passage, and the White Rabbit was still in sight, hurrying down it. There was not a moment to be lost: away went Alice like the wind, and was just in time to hear it say, as it turned a corner, 'Oh my ears and whiskers, how late it's getting!' She was close behind it when she turned the corner, but the Rabbit was no longer to be seen: she found herself in a long, low hall, which was lit up by a row of lamps hanging from the roof.There were doors all round the hall, but they were all locked; and when Alice had been all the way down one side and up the other, trying every door, she walked sadly down the middle, wondering how she was ever to get out again.Suddenly she came upon a little three-legged table, all made of solid glass; there was nothing on it except a tiny golden key, and Alice's first thought was that it might belong to one of the doors of the hall; but, alas! either the locks were too large, or the key was too small, but at any rate it would not open any of them. However, on the second time round, she came upon a low curtain she had not noticed before, and behind it was a little door about fifteen inches high: she tried the little golden key in the lock, and to her great delight it fitted!Alice opened the door and found that it led into a small passage, not much larger than a rat-hole: she knelt down and looked along the passage into the loveliest garden you ever saw. How she longed to get out of that dark hall, and wander about among those beds of bright flowers and those cool fountains, but she could not even get her head through the doorway; 'and even if my head would go through,' thought poor Alice, 'it would be of very little use without my shoulders. Oh, how I wish I could shut up like a telescope! I think I could, if I only know how to begin.' For, you see, so many out-of-the-way things had happened lately, that Alice had begun to think that very few things indeed were really impossible.There seemed to be no use in waiting by the little door, so she went back to the table, half hoping she might find another key on it, or at any rate a book of rules for shutting people up like telescopes: this time she found a little bottle on it, ('which certainly was not here
In [10]:
## 9.13

import requests
from bs4 import BeautifulSoup

universities = ['https://www.helsinki.fi', 'https://www.aalto.fi']
results = {}

search_words = [ 'Twitter', 'Facebook', 'YouTube' ]

for university in universities:
    website_content = requests.get( university ).text
    website_content = website_content.lower()
    
    res = {} ## this is for storing service, count-pairs
    for search_word in search_words:
        res[ search_word ] = website_content.count( search_word.lower() )
        
    results[ university ] = res
    
import json

json.dump( results, open("universities.json", "w") )
In [11]:
## 9.14

import json

data = json.load( open("universities.json") )

for university, results in data.items():
    counts = 0
    for service, count in results.items():
        counts = counts + count
    print( university, counts )
https://www.helsinki.fi 13
https://www.aalto.fi 20
In [17]:
## 9.15

import json

## this could also be a dictionary in dictionary
northwest = {}
northeast = {}
southwest = {}
southeast = {}

data = json.load( open("countries.json") )

for country in data:
    income = country['income']
    if country['lat'] > 0:
        if country['long'] > 0:
            if income not in northeast:
                northeast[ income ] = 0
            northeast[ income ] = northeast[ income ] + 1
        else:
            if income not in northwest:
                northwest[ income ] = 0
            northwest[ income ] = northwest[ income ] + 1
    if country['lat'] < 0:
        if country['long'] > 0:
            if income not in southeast:
                southeast[ income ] = 0
            southeast[ income ] = southeast[ income ] + 1
        else:
            if income not in southwest:
                southwest[ income ] = 0
            southwest[ income ] = southwest[ income ] + 1
            
print( northwest, northeast, southwest, southeast )
{'High income': 6, 'Low income': 1, 'Upper middle income': 1} {'Low income': 2, 'Upper middle income': 6, 'High income': 7, 'Lower middle income': 3} {'Upper middle income': 3, 'Lower middle income': 1, 'High income': 1} {'Lower middle income': 1, 'High income': 1, 'Low income': 1, 'Upper middle income': 1}
In [26]:
## 9.16

import pandas

url = 'https://data.police.uk/api/crimes-street/all-crime?lat=51.5073&lng=-0.171505' ## latest month is shown by default, see documentation
dataframe = pandas.read_json(url)

## note: it is also possible to manually construct the dataframe if you wish and append, remove rows from it. this is a simple example to make it clear why one might want to use tools such as pandas

print( dataframe['category'].value_counts() )
other-theft              400
violent-crime            258
theft-from-the-person    247
anti-social-behaviour    237
vehicle-crime            116
public-order             102
burglary                  98
shoplifting               86
robbery                   74
drugs                     56
criminal-damage-arson     48
bicycle-theft             29
other-crime                8
possession-of-weapons      4
Name: category, dtype: int64
In [ ]: