Program to Implement Meta Data in Python Assignment Solution.

Instructions

Objective
Write a python assignment program to implement meta data.
Requirements and Specifications

program to implement meta data in python
program to implement meta data in python 1
Source Code
A3
"""CSC108: Fall 2021 -- Assignment 3: arxiv.org
This code is provided solely for the personal and private use of
students taking the CSC108/CSCA08 course at the University of
Toronto. Copying for purposes other than this use is expressly
prohibited. All forms of distribution of this code, whether as given
or with any changes, are expressly prohibited.
All of the files in this directory and all subdirectories are:
Copyright (c) 2021 Anya Tafliovich, Michelle Craig, Tom Fairgrieve, Sadia
Sharmin, and Jacqueline Smith.
"""
from io import StringIO
from typing import Any, Dict
import unittest
import arxiv_functions as arxiv
import checker_generic as checker
MODULENAME = 'arxiv_functions'
PYTA_CONFIG = 'a3_pyta.json'
TARGET_LEN = 79
SEP = '='
CONSTANTS = {
 'ID': 'identifier',
 'TITLE': 'title',
 'CREATED': 'created',
 'MODIFIED': 'modified',
 'AUTHORS': 'authors',
 'ABSTRACT': 'abstract',
    'END': 'END'
}
DATA_FILE = """008
Intro to CS is the best course ever
2021-09-01
Ponce,Marcelo
Tafliovich,Anya Y.
We present clear evidence that Introduction to
Computer Science is the best course.
END
827
University of Toronto is the best university
2021-08-20
2021-10-02
Ponce,Marcelo
Bretscher,Anna
Tafliovich,Anya Y.
We show a formal proof that the University of
Toronto is the best university.
END
"""
DATA_DICT = {
 '008': {
 'identifier': '008',
 'title': 'Intro to CS is the best course ever',
 'created': '2021-09-01',
 'modified': '',
 'authors': [('Ponce', 'Marcelo'), ('Tafliovich', 'Anya Y.')],
 'abstract': '''We present clear evidence that Introduction to
Computer Science is the best course.'''},
 '827': {
 'identifier': '827',
 'title': 'University of Toronto is the best university',
 'created': '2021-08-20',
 'modified': '2021-10-02',
 'authors': [('Bretscher', 'Anna'),
 ('Ponce', 'Marcelo'),
 ('Tafliovich', 'Anya Y.')],
 'abstract': '''We show a formal proof that the University of
Toronto is the best university.'''}
}
class CheckTest(unittest.TestCase):
 """A simple checker (NOT a full tester!) for assignment functions."""
 def test_contains_keyword(self) -> None:
 """A simple check for contains_keyword."""
 self._check_list_of_type(arxiv.contains_keyword,
 [DATA_DICT, 'is'], str)
 def test_created_in_year(self) -> None:
 """A simple check for created_in_year."""
 self._check_simple_type(arxiv.created_in_year,
 [DATA_DICT, '008', 2021], bool)
 def test_read_arxiv_file(self) -> None:
 """A simple check for read_arxiv_file."""
 print('\nChecking read_arxiv_file...')
 result = checker.returns_dict_of(
 arxiv.read_arxiv_file, [StringIO(DATA_FILE)], str, dict)
 self.assertTrue(result[0], result[1])
 valid_keys = {'identifier', 'title', 'created', 'modified',
 'authors', 'abstract'}
 msg = 'Value corresponding to key "{}" should be a {}.'
 for article in result[1].values():
 self.assertTrue(isinstance(article['authors'], list) and
 _all_names(article['authors']),
 msg.format('authors', 'list of names'))
 for key in valid_keys - {'authors'}:
 self.assertTrue(article[key] is None or
 isinstance(article[key], str),
 msg.format(key, 'str or None'))
 print(' check complete')
 def test_make_author_to_article(self) -> None:
 """A simple check for make_author_to_articles."""
 print('\nChecking make_author_to_articles...')
 result = checker.type_check_simple(
 arxiv.make_author_to_articles, [DATA_DICT], dict)
 self.assertTrue(result[0], result[1])
 msg = ('make_author_to_articles should return a dict in which\n'
 'keys are names (Tuple[str, str]) and values are lists of\n'
 'article IDs (List[str]).')
 for key, value in result[1].items():
 self.assertTrue(_is_name(key) and
 isinstance(value, list) and
 all(isinstance(elt, str) for elt in value),
 msg)
 print(' check complete')
 def test_get_coauthors(self) -> None:
 """A simple check for get_coauthors."""
 self._check_list_of_names(arxiv.get_coauthors,
 [DATA_DICT, ('Tafliovich', 'Anya Y.')])
 def test_get_most_published_authors(self) -> None:
 """A simple check for get_most_published_authors."""
 self._check_list_of_names(arxiv.get_most_published_authors,
 [DATA_DICT])
 def test_suggest_collaborators(self) -> None:
 """A simple check for suggest_collaborators."""
 self._check_list_of_names(arxiv.suggest_collaborators,
 [DATA_DICT, ('Tafliovich', 'Anya Y.')])
 def test_has_prolific_authors(self) -> None:
 """A simple check for has_prolific_authors."""
 by_author = {
 ('Ponce', 'Marcelo'): ['008', '827'],
 ('Tafliovich', 'Anya Y.'): ['008', '827'],
 ('Bretscher', 'Anna'): ['827']
 }
 self._check_simple_type(arxiv.has_prolific_authors,
 [by_author, DATA_DICT['008'], 2],
 bool)
 def test_keep_prolific_authors(self) -> None:
 """A simple check for keep_prolific_authors: only checks that the
 return value is None."""
 self._check_simple_type(arxiv.keep_prolific_authors,
 [DATA_DICT, 2],
 type(None))
 def test_check_constants(self) -> None:
 """Check that values of constants are not changed."""
 print('\nChecking that constants refer to their original values')
 self._check_constants(CONSTANTS, arxiv)
 print(' check complete')
 def _check_list_of_names(self, func: callable, args: list) -> None:
 """Check that func called with arguments args returns a
 List[constants.NameType]. Display progress and outcome of the
 check.
 """
 print('\nChecking {}...'.format(func.__name__))
 result = checker.type_check_simple(func, args, list)
 self.assertTrue(result[0], result[1])
 msg = ('{} should return a list of tuples of two strs:\n'
 '[(lastname1, firstname1), (lastname2, firstname2), ...]\n'
 'Test your function thoroughly!').format(func.__name__)
 self.assertTrue(_all_names(result[1]), msg)
 print(' check complete')
 def _check_simple_type(self, func: callable, args: list,
 expected: type) -> None:
 """Check that func called with arguments args returns a value of type
 expected. Display the progress and the result of the check.
 """
 print('\nChecking {}...'.format(func.__name__))
 result = checker.type_check_simple(func, args, expected)
 self.assertTrue(result[0], result[1])
 print(' check complete')
 def _check_list_of_type(self, func: callable, args: list, typ: type) -> None:
 """Check that func called with arguments args returns a list with
 values of the type expected. Display the progress and the result of
 the check.
 """
 print('\nChecking {}...'.format(func.__name__))
 result = checker.returns_list_of(func, args, typ)
 self.assertTrue(result[0], result[1])
 print(' check complete')
 def _check_constants(self, name2value: Dict[str, Any], mod: Any) -> None:
 """Check that, for each (name, value) pair in name2value, the value of
 a variable named name in module mod is value.
 """
 for name, expected in name2value.items():
 actual = getattr(mod, name)
 msg = 'The value of {} should be {} but is {}.'.format(
 name, expected, actual)
 self.assertEqual(expected, actual, msg)
def _all_names(obj: Any) -> bool:
 """Return whether every item in obj is of type constants.NameType."""
 return all(_is_name(name) for name in obj)
def _is_name(obj: Any) -> bool:
 """Return whether obj is a name, i.e. a Tuple[str, str]."""
 return (isinstance(obj, tuple) and len(obj) == 2 and
 isinstance(obj[0], str) and isinstance(obj[1], str))
checker.ensure_no_io(MODULENAME)
print(''.center(TARGET_LEN, SEP))
print(' Start: checking coding style '.center(TARGET_LEN, SEP))
checker.run_pyta(MODULENAME + '.py', PYTA_CONFIG)
print(' End checking coding style '.center(TARGET_LEN, SEP))
print(' Start: checking type contracts '.center(TARGET_LEN, SEP))
unittest.main(exit=False)
print(' End checking type contracts '.center(TARGET_LEN, SEP))
print('\nScroll up to see ALL RESULTS:')
print(' - checking coding style')
print(' - checking type contract\n')
ARXIV FUNCTION
"""CSC108: Fall 2021 -- Assignment 3: arxiv.org
This code is provided solely for the personal and private use of
students taking the CSC108/CSCA08 course at the University of
Toronto. Copying for purposes other than this use is expressly
prohibited. All forms of distribution of this code, whether as given
or with any changes, are expressly prohibited.
All of the files in this directory and all subdirectories are:
Copyright (c) 2021 Anya Tafliovich, Michelle Craig, Tom Fairgrieve, Sadia
Sharmin, and Jacqueline Smith.
"""
# importing copy for use in the keep_prolific_authors docstring
# you do not need to use it anywhere else
import copy
from typing import Dict, List, TextIO
from constants import (ID, TITLE, CREATED, MODIFIED, AUTHORS, ABSTRACT, END,
 NameType, ArticleValueType, ArticleType, ArxivType)
EXAMPLE_ARXIV = {
 '031': {
 ID: '031',
 TITLE: 'Calculus is the Best Course Ever',
 CREATED: '',
 MODIFIED: '2021-09-02',
 AUTHORS: [('Breuss', 'Nataliya')],
 ABSTRACT: 'We discuss the reasons why Calculus is the best course.'},
 '067': {
 ID: '067',
 TITLE: 'Discrete Mathematics is the Best Course Ever',
 CREATED: '2021-09-02',
 MODIFIED: '2021-10-01',
 AUTHORS: [('Pancer', 'Richard'), ('Bretscher', 'Anna')],
 ABSTRACT: ('We explain why Discrete Mathematics is the best ' +
 'course of all times.')},
 '827': {
 ID: '827',
 TITLE: 'University of Toronto is the Best University',
 CREATED: '2021-08-20',
 MODIFIED: '2021-10-02',
 AUTHORS: [('Ponce', 'Marcelo'), ('Bretscher', 'Anna'),
 ('Tafliovich', 'Anya Y.')],
 ABSTRACT: 'We show a formal proof that the University of\n' +
 'Toronto is the best university.'},
 '008': {
 ID: '008',
 TITLE: 'Intro to CS is the Best Course Ever',
 CREATED: '2021-09-01',
 MODIFIED: '',
 AUTHORS: [('Ponce', 'Marcelo'), ('Tafliovich', 'Anya Y.')],
 ABSTRACT: 'We present clear evidence that Introduction to\n' + \
 'Computer Science is the best course.'},
 '042': {
 ID: '042',
 TITLE: '',
 CREATED: '2021-05-04',
 MODIFIED: '2021-05-05',
 AUTHORS: [],
 ABSTRACT: 'This is a strange article with no title\n' + \
 'and no authors.\n\nIt also has a blank line in its abstract!'}
}
EXAMPLE_BY_AUTHOR = {
 ('Ponce', 'Marcelo'): ['008', '827'],
 ('Tafliovich', 'Anya Y.'): ['008', '827'],
 ('Bretscher', 'Anna'): ['067', '827'],
 ('Breuss', 'Nataliya'): ['031'],
 ('Pancer', 'Richard'): ['067']
}
################################################
## Task 1
################################################
# a helper to remove non-alphabetic characters
def clean_word(word: str) -> str:
 """Return word with all non-alphabetic characters removed and converted to
 lowercase.
 Precondition: word contains no whitespace
 >>> clean_word('Hello!!!')
 'hello'
 >>> clean_word('12cat.dog?')
 'catdog'
 >>> clean_word("DON'T")
 'dont'
 """
 new_word = ''
 for ch in word:
 if ch.isalpha():
 new_word = new_word + ch.lower()
 return new_word
# Add your other Task 1 functions here
def created_in_year(ArxivType, id: str, year: int):
 # Check if there is an article with the given id in the data
 if id in ArxivType:
 article = ArxivType[id]
 # get date
 article_date = article[CREATED]
 # Split and get year
 article_year = int(article_date.split("-")[0])
 # Chekc if years match
 return article_year == year
 # If we reach this line, it is because there is no article with that id in the data, so return None
 return None
def contains_keyword(id_to_article: ArxivType, word: str) -> List[str]:
 # clean word
 word = clean_word(word)
 # Create a list to store all article with the word
 result = []
 # Loop through articles
 for id in id_to_article:
 if word in id_to_article[id][TITLE].lower() or word in id_to_article[id][ABSTRACT].lower():
 result.append(id)
 # Now sort list
 result = sorted(result)
 return result
################################################
## Task 2
################################################
def extract_article_data(data: ArxivType) -> ArticleType:
 id = data[0]
 title = data[1]
 creation_date = data[2]
 modified_date = data[3]
 authors = []
 if data[4] != '': # the authors line is not empty
 line = data[4]
 i = 4
 while line != '':
 author_data = line.split(",")
 authors.append(tuple(author_data))
 line = data[i+1]
 i += 1
 # We are now at an empty line
 # the next line contains the abstract
 abstract = ''
 for j in range(i, len(data)):
 abstract += data[j]
 else: # there is no authors
 abstract = ''
 i = 5
 for j in range(i, len(data)):
 abstract += data[j]
 # Sort authors
 #authors = sorted(authors, key = lambda x: x[0])
 #print(authors)
 # Now build dict
 article = {
 ID: id,
 TITLE: title,
 CREATED: creation_date,
 MODIFIED: modified_date,
 AUTHORS: authors,
 ABSTRACT: abstract
 }
 return article
def read_arxiv_file(f: TextIO) -> ArxivType:
 """Return a ArxivType dictionary containing the arxiv metadata in f.
 Note we do not include example calls for functions that take open files.
 """
 # Complete this Task 2 function
 # Create Dictionary
 ArxivType = dict()
 # Read lines
 lines = f.readlines()
 # Create a list to store all lines before an 'END'
 while len(lines) > 0:
 line = lines.pop(0).strip()
 data = []
 while line != 'END':
 data.append(line)
 line = lines.pop(0).strip()
 # Extract article
 article = extract_article_data(data)
 ArxivType[article[ID]] = article
 # Now, sort ArxivType by title
 ArxivType = dict(sorted(ArxivType.items(), key = lambda x: x[1][TITLE] if x[1][TITLE] else 'z'*100))
 return ArxivType
# Add your helper functions for Task 2 here
################################################
## Task 3
################################################
def make_author_to_articles(id_to_article: ArxivType
 ) -> Dict[NameType, List[str]]:
 """Return a dict that maps each author name to a list (sorted in
 lexicographic order) of IDs of articles written by that author,
 based on the information in id_to_article.
 >>> make_author_to_articles(EXAMPLE_ARXIV) == EXAMPLE_BY_AUTHOR
 True
 >>> make_author_to_articles({})
 {}
 """
 # Complete this Task 3 function
 result = dict()
 # Loop through articles
 for id in id_to_article:
 for author in id_to_article[id][AUTHORS]:
 if not author in result:
 result[author] = []
 result[author].append(id)
 # Now sort the values
 for author, lst in result.items():
 lst = sorted(lst)
 result[author] = lst
 return result
def get_coauthors(data: ArxivType, author: NameType ) -> List[NameType]:
 result = list()
 for id in data:
 authors = data[id][AUTHORS]
 if author in authors:
 for author2 in authors:
 if author2 != author:
 result += [x for x in authors if x != author and x not in result]
 # Now sort
 result = sorted(result, key = lambda x: x[0])
 return result
def get_most_published_authors(data: ArxivType) -> List[NameType]:
 articles_count = dict()
 for id in data:
 authors = data[id][AUTHORS]
 for author in authors:
 articles_count[author] = articles_count.get(author, 0) + 1
 # Now, get the max value in the dictionary (max count of articles)
 max_count = max(articles_count.items(), key = lambda x: x[1])[1]
 result = []
 # Now, get all keys with this value. If there is just one, then one key will be returned.
 # If there is more than one, then all keys with that value are returned
 for key in articles_count:
 if articles_count[key] == max_count:
 result.append(key)
 # Now sort
 result = sorted(result, key=lambda x: x[0])
 return result
def suggest_collaborators(data: ArxivType, author: NameType) -> List[NameType]:
 # First, get author's coauthors
 coauthors = get_coauthors(data, author)
 # Now, for each coauthor, get its coauthors
 result = []
 for c_author in coauthors:
 co_coauthors = get_coauthors(data, c_author)
 result += [x for x in co_coauthors if x != author and x not in result and x not in coauthors]
 # Sort
 result = sorted(result, key=lambda x: x[0])
 return result
# Add your other functions for Task 3 here
################################################
## Task 4
################################################
# Add your Task 4 functions here
def has_prolific_authors(data: Dict[NameType, List[str]], id: str, min_number: int):
 # First, loop turhough data
 for author in data:
 # Check if the article_id is in this author's articles
 if id in data[author] and len(data[author]) >= min_number:
 return True
 return False
def keep_prolific_authors(id_to_article: ArxivType,
 min_publications: int) -> None:
 """Update id_to_article so that it contains only articles published by
 authors with min_publications or more articles published. As long
 as at least one of the authors has min_publications, the article
 is kept.
 >>> arxiv_copy = copy.deepcopy(EXAMPLE_ARXIV)
 >>> keep_prolific_authors(arxiv_copy, 2)
 >>> len(arxiv_copy)
 3
 >>> '008' in arxiv_copy and '067' in arxiv_copy and '827' in arxiv_copy
 True
 >>> arxiv_copy = copy.deepcopy(EXAMPLE_ARXIV)
 >>> keep_prolific_authors(arxiv_copy, 3)
 >>> arxiv_copy
 {}
 """
 # Complete the body of this function. We have provided this docstring to
 # you so that you can use the EXAMPLE_ARXIV for testing mutation.
 # Note that we do not expect you to know about the copy.deepcopy function.
 # Make author to articles
 author_to_article = make_author_to_articles(id_to_article)
 # Define a list to store the articles to be removed
 ids_to_remove = list()
 # Loop through articles
 for id in id_to_article:
 authors = id_to_article[id][AUTHORS]
 # Check if at least one author is prolific
 has_prolific = False
 for author in authors:
 if has_prolific_authors(author_to_article, id, min_publications):
 has_prolific = True
 break
 if not has_prolific: # There is no prolific author in this article
 ids_to_remove.append(id)
 # Now remove
 for id in ids_to_remove:
 id_to_article.pop(id, None)
if __name__ == '__main__':
 # uncomment the lines below to run doctest on your code
 # note that doctest requires your docstring examples to be perfectly
 # formatted, and we will not be running doctest on your code
 import doctest
 doctest.testmod()
 # uncomment the lines below to work with the small data set
 example_data = open('example_data.txt')
 example_arxiv = read_arxiv_file(example_data)
 example_data.close()
 if example_arxiv == EXAMPLE_ARXIV:
 print('The dictionary you produced matches EXAMPLE_ARXIV!')
 print('This is a good sign, but do more of your own testing!')
 else:
 # If you are getting this message, try setting a breakpoint on the
 # line that calls read_arxiv_file above and running the debugger
 print('Expected to get', EXAMPLE_ARXIV)
 print('But got', example_arxiv)
 # uncomment the lines below to work with a larger data set
 data = open('data.txt')
 arxiv = read_arxiv_file(data)
 data.close()
 author_to_articles = make_author_to_articles(arxiv)
 most_published = get_most_published_authors(arxiv)
 print(most_published)
 print(get_coauthors(arxiv, ('Varanasi', 'Mahesh K.'))) # one
 print(get_coauthors(arxiv, ('Chablat', 'Damien'))) # many
CONSTANTS
"""CSC108: Fall 2021 -- Assignment 3: arxiv.org
This code is provided solely for the personal and private use of
students taking the CSC108/CSCA08 course at the University of
Toronto. Copying for purposes other than this use is expressly
prohibited. All forms of distribution of this code, whether as given
or with any changes, are expressly prohibited.
All of the files in this directory and all subdirectories are:
Copyright (c) 2021 Anya Tafliovich, Michelle Craig, Tom Fairgrieve, Sadia
Sharmin, and Jacqueline Smith.
"""
from typing import Dict, List, Tuple, Union
ID = 'identifier'
TITLE = 'title'
CREATED = 'created'
MODIFIED = 'modified'
AUTHORS = 'authors'
ABSTRACT = 'abstract'
END = 'END'
# We store names as tuples of two strs: (last-name, first-name(s)).
NameType = Tuple[str, str]
# ArticleValueType is the type for valid values in the ArticleType
# dict. All values are str, except for the value associated with
# key AUTHORS, which is a List of NameType.
# Note that we have not introduced Union - you can read it as "or"
ArticleValueType = Union[str, List[NameType]]
# ArticleType is a dict that maps keys ID, TITLE, CREATED, MODIFIED,
# AUTHORS, and ABSTRACT to their values (of type ArticleValueType).
ArticleType = Dict[str, ArticleValueType]
# ArxivType is a dict that maps article identifiers to articles,
# i.e. to values of type ArticleType.
ArxivType = Dict[str, ArticleType]
Create a Program to Implement Meta Data in Python Assignment Solution.

Instructions

Requirements and Specifications