summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorpommicket <leonardomtenenbaum@gmail.com>2016-01-31 16:24:30 -0500
committerpommicket <leonardomtenenbaum@gmail.com>2016-01-31 16:24:30 -0500
commit921152df861a4e5a4a239e1e1bf555610bc7bb3e (patch)
tree56edaf50c2bc5f652e805ed76ba826b6fe8f31c2
parent79c9e6c2087c872017efe38b7b1389ea9886a30b (diff)
Created StringLearn
-rw-r--r--StringCreate/FileIO.h28
-rw-r--r--StringCreate/Makefile4
-rw-r--r--StringCreate/StringDoubleMap.h107
-rw-r--r--StringCreate/StringFunctions.h170
-rwxr-xr-xStringCreate/build/GNULinux/StringCreatebin0 -> 16992 bytes
-rw-r--r--StringCreate/build/Windows/StringCreate.exebin0 -> 46191 bytes
-rw-r--r--StringCreate/main.c164
-rwxr-xr-xbuild/GNULinux/StringLearnbin0 -> 16464 bytes
-rw-r--r--build/Windows/StringLearn.exebin0 -> 43498 bytes
-rw-r--r--src/FileIO.h28
-rw-r--r--src/Makefile6
-rw-r--r--src/StringFunctions.h137
-rw-r--r--src/StringIntMap.h107
-rw-r--r--src/main.c138
14 files changed, 889 insertions, 0 deletions
diff --git a/StringCreate/FileIO.h b/StringCreate/FileIO.h
new file mode 100644
index 0000000..d6239db
--- /dev/null
+++ b/StringCreate/FileIO.h
@@ -0,0 +1,28 @@
+int fileSize(FILE* fp)
+{
+ //Size of file in bytes
+ int sz;
+ fseek(fp, 0L, SEEK_END);
+ sz = ftell(fp);
+ fseek(fp, 0L, SEEK_SET);
+ return sz;
+}
+
+
+char* fileContents(char* fname)
+{
+ //Contents of a file
+ FILE* fp = fopen(fname, "r");
+ int sz = fileSize(fp);
+ char* buffer = malloc(sz);
+ fread(buffer, sz, 1, fp);
+ fclose(fp);
+ return buffer;
+}
+
+void fileWrite(char* fname, char* s)
+{
+ FILE* fp = fopen(fname, "w");
+ fwrite(s, strlen(s), 1, fp);
+ fclose(fp);
+}
diff --git a/StringCreate/Makefile b/StringCreate/Makefile
new file mode 100644
index 0000000..5fee6af
--- /dev/null
+++ b/StringCreate/Makefile
@@ -0,0 +1,4 @@
+CC=gcc
+default: StringCreate
+StringCreate: main.c StringFunctions.h StringDoubleMap.h
+ $(CC) -o StringCreate main.c
diff --git a/StringCreate/StringDoubleMap.h b/StringCreate/StringDoubleMap.h
new file mode 100644
index 0000000..0cda933
--- /dev/null
+++ b/StringCreate/StringDoubleMap.h
@@ -0,0 +1,107 @@
+typedef struct StringDoublePair
+{
+ char* s;
+ double i;
+} StringDoublePair;
+
+typedef struct StringDoubleMap
+{
+ int index;
+ int capacity;
+ StringDoublePair* pairs;
+} StringDoubleMap;
+
+StringDoubleMap newStringDoubleMap()
+{
+ StringDoubleMap sdm;
+ StringDoublePair sdp;
+ StringDoublePair* sdpp = &sdp;
+
+ sdm.index = 0;
+ sdm.capacity = 0;
+ sdm.pairs = sdpp;
+ return sdm;
+}
+
+void resize(StringDoubleMap* sdm, int capacity)
+{
+ int old_capacity = sdm->capacity;
+ sdm->capacity = capacity;
+ StringDoublePair* oldPairs = sdm->pairs;
+ StringDoublePair* newPairs = malloc(sizeof(StringDoublePair)*capacity);
+ int i;
+ for (i = 0; i < old_capacity; i++)
+ newPairs[i] = oldPairs[i];
+
+ sdm->pairs = newPairs;
+}
+
+void mapAdd(StringDoubleMap* sdm, char* s, double i)
+{
+ if (sdm->index >= sdm->capacity)
+ {
+ if (sdm->capacity == 0)
+ resize(sdm, 1);
+ else
+ resize(sdm, sdm->capacity*2);
+ }
+
+ StringDoublePair sdp;
+ sdp.i = i;
+ sdp.s = s;
+ sdm->pairs[sdm->index++] = sdp;
+
+}
+
+void mapSet(StringDoubleMap* sdm, char* s, double i)
+{
+ int x;
+ for (x = 0; x < sdm->index; x++)
+ {
+ if (strEqual(sdm->pairs[x].s, s))
+ {
+ sdm->pairs[x].i = i;
+ return;
+ }
+ }
+}
+
+void mapInc(StringDoubleMap* sdm, char* s)
+{
+ int i;
+ for (i = 0; i < sdm->index; i++)
+ {
+ if (strEqual(sdm->pairs[i].s, s))
+ {
+ sdm->pairs[i].i++;
+ return;
+ }
+ }
+}
+
+int mapAt(StringDoubleMap sdm, char* s)
+{
+ int i;
+ for (i = 0; i < sdm.index; i++)
+ if (strEqual(sdm.pairs[i].s, s))
+ return sdm.pairs[i].i;
+
+ return -1;
+}
+void printMap(StringDoubleMap sdm)
+{
+ int i;
+ for (i = 0; i < sdm.index; i++)
+ printf("%s:%f\n", sdm.pairs[i].s, sdm.pairs[i].i);
+ printf("\n\n");
+}
+
+int mapContains(StringDoubleMap sdm, char* s)
+{
+ int i;
+ for (i = 0; i < sdm.index; i++)
+ if (strEqual(sdm.pairs[i].s, s))
+ return 1;
+
+ return 0;
+}
diff --git a/StringCreate/StringFunctions.h b/StringCreate/StringFunctions.h
new file mode 100644
index 0000000..a34b58b
--- /dev/null
+++ b/StringCreate/StringFunctions.h
@@ -0,0 +1,170 @@
+int strEqual(char* a, char* b)
+{
+ //Wether string a is equal to string b
+ if (strlen(a) != strlen(b))
+ return 0;
+ int i;
+ for (i = 0; i < strlen(a); i++)
+ if (a[i] != b[i])
+ return 0;
+
+ return 1;
+}
+
+int count(char* a, char c)
+{
+ //The number of occurences of c in a
+ int i, co = 0;
+ for (i = 0; i < strlen(a); i++)
+ if (a[i] == c)
+ co++;
+
+}
+
+
+char* copy(char* s)
+{
+ char* c = malloc(strlen(s));
+ int i;
+ for (i = 0; i < strlen(s); i++)
+ c[i] = s[i];
+ return c;
+}
+
+char* substring(char* s, int start, int length)
+{
+ //A substring of s in [start,end)
+ if (length <= 0)
+ return "";
+ char* sub = malloc(length+1);
+ memcpy(sub, &s[start], length);
+ sub[length] = '\0';
+ return sub;
+}
+
+int indexOf(char* s, char* sub, int start, int end)
+{
+ //Index of sub in s in [start, end)
+
+ char *result = strstr(s, sub);
+ int position = result - s;
+ int substringLength = strlen(s) - position;
+ return position;
+}
+
+int indexOfChar(char* s, char c, int start, int end)
+{
+ //Index of c in s in [start, end) -1 if no occurences
+ int i;
+ for (i = start; i < end; i++)
+ if (s[i] == c)
+ return i;
+ return -1;
+}
+
+int strListIndex(char** strlist, int strlistlen, char* str)
+{
+ int i;
+ for (i = 0; i < strlistlen; i++)
+ if (strEqual(strlist[i], str))
+ return i;
+ return -1;
+}
+
+
+char* addStrings(char* a, char* b)
+{
+ int alen = strlen(a);
+ int blen = strlen(b);
+ char* sum = malloc(alen+blen+1);
+ int i;
+ for (i = 0; i < alen; i++)
+ sum[i] = a[i];
+ for (i = alen; i < alen+blen; i++)
+ sum[i] = b[i-alen];
+ sum[alen+blen] = 0;
+ return sum;
+}
+
+char* doubleToString(double d)
+{
+ char* x = malloc(64);
+ sprintf(x, "%f", d);
+ return x;
+}
+
+
+StringDoublePair sdpFromString(char* s)
+{
+ int colonIndex = indexOfChar(s, ':', 0, strlen(s));
+ char* s1 = substring(s, 0, colonIndex);
+ char* s2 = substring(s, colonIndex+1, strlen(s));
+ double d = atof(s2);
+ StringDoublePair sdp;
+ sdp.i = d;
+ sdp.s = s1;
+ return sdp;
+
+}
+
+StringDoubleMap sdmFromString(char* s)
+{
+ int end, index = 0;
+ StringDoubleMap sdm = newStringDoubleMap();
+
+ while (1)
+ {
+ end = indexOfChar(s, '\n', index, strlen(s));
+ if (end < 0)
+ end = strlen(s);
+ char* line = substring(s, index, end-index);
+ StringDoublePair sdp = sdpFromString(line);
+
+ mapAdd(&sdm, sdp.s, sdp.i);
+ if (end == strlen(s))
+ break;
+ index = end+1;
+ }
+
+ return sdm;
+}
+
+
+double getProbability(char* s, char* ngram)
+{
+ int slen = strlen(s);
+ int ngIndex = indexOf(s, addStrings(ngram, ":"), 0, slen);
+ if (ngIndex < 0)
+ return 0.5;
+ ngIndex += strlen(ngram)+1;
+ int endingIndex = indexOfChar(s, '\n', ngIndex, slen);
+ if (endingIndex == -1)
+ endingIndex = strlen(s);
+ double p;
+ char* sp = substring(s, ngIndex, endingIndex);
+ p = atof(sp);
+ free(sp);
+ return p;
+}
+
+char* replaceProbability(char* s, char* ngram, double newP)
+{
+ int slen = strlen(s);
+ char* withColon = addStrings(ngram, ":");
+ int ngIndex = indexOf(s, withColon, 0, slen);
+ if (ngIndex < 0)
+ return addStrings(addStrings(s, addStrings("\n", withColon)), doubleToString(newP));
+ ngIndex += strlen(ngram)+1;
+ int endingIndex = indexOfChar(s, '\n', ngIndex, slen);
+ if (endingIndex == -1)
+ endingIndex = slen;
+
+ char* beforeNG = substring(s, 0, ngIndex);
+ char* afterNG = substring(s, endingIndex, slen);
+ char* newNG = doubleToString(newP);
+ char* bngng = addStrings(beforeNG, newNG);
+
+
+ char* newS = addStrings(bngng, afterNG);
+ return newS;
+}
diff --git a/StringCreate/build/GNULinux/StringCreate b/StringCreate/build/GNULinux/StringCreate
new file mode 100755
index 0000000..6557221
--- /dev/null
+++ b/StringCreate/build/GNULinux/StringCreate
Binary files differ
diff --git a/StringCreate/build/Windows/StringCreate.exe b/StringCreate/build/Windows/StringCreate.exe
new file mode 100644
index 0000000..db00cc9
--- /dev/null
+++ b/StringCreate/build/Windows/StringCreate.exe
Binary files differ
diff --git a/StringCreate/main.c b/StringCreate/main.c
new file mode 100644
index 0000000..a566453
--- /dev/null
+++ b/StringCreate/main.c
@@ -0,0 +1,164 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include "StringDoubleMap.h"
+#include "StringFunctions.h"
+#include "FileIO.h"
+#define DOCUMENTATION "StringCreate Documentation\nStringCreate is used to, given a .stl file from StringLearn, create a random string.\nUsage: %s <stlfile> [--stopX] -l length [-p new-ngram-chance]\nIf --stopX is enabled, StringCreate will stop when an X is reached.\n"
+#define USAGE "Usage: %s <stlfile> [--stopX] -l length [-p new-ngram-chance]\n"
+#define DEFAULT_NEW_NGRAM_CHANCE 0.5
+
+char* stlContents;
+double newNgramChance;
+int stringLength;
+StringDoubleMap ngrams;
+int stopX;
+void displayHelp(char* argv0)
+{
+ printf(DOCUMENTATION, argv0);
+}
+
+void usageError(char* argv0)
+{
+ fprintf(stderr, USAGE);
+}
+
+double rand01()
+{
+ return ((double)rand()/RAND_MAX);
+}
+
+int randrange(int start, int end)
+{
+ return (int)(rand01()*(end-start) + start);
+}
+
+char nextChar(char* s, int sz)
+{
+ int possibleNgrams = 0;
+ int i;
+ char* ngram;
+ int ngramLen;
+ double probabilitySum = newNgramChance;
+ for (i = 0; i < ngrams.index; i++)
+ {
+ ngram = ngrams.pairs[i].s;
+ ngramLen = strlen(ngram);
+ if (ngramLen > (sz+1))
+ continue;
+ if (strEqual(substring(ngram, 0, ngramLen-1), substring(s, sz-ngramLen+1, ngramLen-1)))
+ probabilitySum += ngrams.pairs[i].i;
+
+ }
+
+ double selectedNgram = probabilitySum * rand01();
+ if (selectedNgram < newNgramChance)
+ {
+ if (!stopX)
+ return (char)randrange(97, 123);
+ else
+ {
+ int n = randrange(97, 124);
+ if (n == 123)
+ return 'X';
+ else
+ return (char)n;
+ }
+ }
+ selectedNgram -= newNgramChance;
+ double sum = 0;
+ for (i = 0; i < ngrams.index; i++)
+ {
+ ngram = ngrams.pairs[i].s;
+ ngramLen = strlen(ngram);
+ if (ngramLen > (sz+1))
+ continue;
+ if (strEqual(substring(ngram, 0, ngramLen-1), substring(s, sz-ngramLen+1, ngramLen-1)))
+ {
+ sum += ngrams.pairs[i].i;
+ if (sum > selectedNgram)
+ return ngram[ngramLen-1];
+ }
+ }
+ return 'a';
+}
+
+char* generateString()
+{
+ char* string;
+ int i = 0;
+ if (!stopX)
+ {
+ string = malloc(stringLength+1);
+ for (i = 0; i < stringLength; i++)
+ string[i] = nextChar(string, i);
+ string[stringLength] = 0;
+ }
+ else
+ {
+ char c;
+ string = malloc(2048);
+ while (1)
+ {
+ c = nextChar(string, i);
+ if (c == 'X')
+ {
+ if (i >= stringLength)
+ break;
+ else
+ continue;
+ }
+ string[i] = c;
+ if ((++i) >= 2047)
+ break;
+ }
+ string[strlen(string)-1] = 0;
+ }
+ return string;
+}
+
+int main(int argc, char** argv)
+{
+ srand(time(NULL));
+ int i;
+
+ if (strListIndex(argv, argc, "--help") != -1)
+ {
+ displayHelp(argv[0]);
+ return 0;
+ }
+
+ if (argc < 4)
+ {
+ usageError(argv[0]);
+ return 1;
+ }
+
+ if (strListIndex(argv, argc, "-p") == argc-1 || strListIndex(argv, argc, "-l") == -1 || strListIndex(argv, argc, "-l") == argc-1)
+ {
+ usageError(argv[0]);
+ return 1;
+ }
+ stopX = strListIndex(argv, argc, "--stopX") != -1;
+
+ char* stlfilename;
+ newNgramChance = DEFAULT_NEW_NGRAM_CHANCE;
+ for (i = 1; i < argc; i++)
+ {
+ if (argv[i][0] == '-')
+ continue;
+ if (strEqual(argv[i-1], "-p"))
+ newNgramChance = atof(argv[i]);
+ else if (strEqual(argv[i-1], "-l"))
+ stringLength = atoi(argv[i]);
+ else
+ stlfilename = argv[i];
+ }
+ stlContents = fileContents(stlfilename);
+ ngrams = sdmFromString(stlContents);
+ printf("%s\n", generateString());
+
+ return 0;
+
+}
diff --git a/build/GNULinux/StringLearn b/build/GNULinux/StringLearn
new file mode 100755
index 0000000..b2d104d
--- /dev/null
+++ b/build/GNULinux/StringLearn
Binary files differ
diff --git a/build/Windows/StringLearn.exe b/build/Windows/StringLearn.exe
new file mode 100644
index 0000000..4f69148
--- /dev/null
+++ b/build/Windows/StringLearn.exe
Binary files differ
diff --git a/src/FileIO.h b/src/FileIO.h
new file mode 100644
index 0000000..d6239db
--- /dev/null
+++ b/src/FileIO.h
@@ -0,0 +1,28 @@
+int fileSize(FILE* fp)
+{
+ //Size of file in bytes
+ int sz;
+ fseek(fp, 0L, SEEK_END);
+ sz = ftell(fp);
+ fseek(fp, 0L, SEEK_SET);
+ return sz;
+}
+
+
+char* fileContents(char* fname)
+{
+ //Contents of a file
+ FILE* fp = fopen(fname, "r");
+ int sz = fileSize(fp);
+ char* buffer = malloc(sz);
+ fread(buffer, sz, 1, fp);
+ fclose(fp);
+ return buffer;
+}
+
+void fileWrite(char* fname, char* s)
+{
+ FILE* fp = fopen(fname, "w");
+ fwrite(s, strlen(s), 1, fp);
+ fclose(fp);
+}
diff --git a/src/Makefile b/src/Makefile
new file mode 100644
index 0000000..de0eb8a
--- /dev/null
+++ b/src/Makefile
@@ -0,0 +1,6 @@
+CC=gcc
+CFLAGS=-lm
+default: StringLearn
+
+StringLearn: main.c FileIO.h StringFunctions.h StringIntMap.h
+ $(CC) $(CFLAGS) -o StringLearn main.c
diff --git a/src/StringFunctions.h b/src/StringFunctions.h
new file mode 100644
index 0000000..202621b
--- /dev/null
+++ b/src/StringFunctions.h
@@ -0,0 +1,137 @@
+int strEqual(char* a, char* b)
+{
+ //Wether string a is equal to string b
+ if (strlen(a) != strlen(b))
+ return 0;
+ int i;
+ for (i = 0; i < strlen(a); i++)
+ if (a[i] != b[i])
+ return 0;
+
+ return 1;
+}
+
+int count(char* a, char c)
+{
+ //The number of occurences of c in a
+ int i, co = 0;
+ for (i = 0; i < strlen(a); i++)
+ if (a[i] == c)
+ co++;
+
+}
+
+
+char* copy(char* s)
+{
+ char* c = malloc(strlen(s));
+ int i;
+ for (i = 0; i < strlen(s); i++)
+ c[i] = s[i];
+ return c;
+}
+
+char* substring(char* s, int start, int length)
+{
+ //A substring of s in [start,end)
+ /*char* sub = malloc(end-start);
+ int i;
+ for (i = start; i < end; i++)
+ sub[i-start] = s[i];
+ return sub;*/
+ char* sub = malloc(length+1);
+ memcpy(sub, &s[start], length);
+ sub[length] = '\0';
+ return sub;
+}
+
+int indexOf(char* s, char* sub, int start, int end)
+{
+ //Index of sub in s in [start, end)
+
+ char *result = strstr(s, sub);
+ int position = result - s;
+ int substringLength = strlen(s) - position;
+ return position;
+}
+
+int indexOfChar(char* s, char c, int start, int end)
+{
+ //Index of c in s in [start, end) -1 if no occurences
+ int i;
+ for (i = start; i < end; i++)
+ if (s[i] == c)
+ return i;
+ return -1;
+}
+
+int strListIndex(char** strlist, int strlistlen, char* str)
+{
+ int i;
+ for (i = 0; i < strlistlen; i++)
+ if (strEqual(strlist[i], str))
+ return i;
+ return -1;
+}
+
+
+char* addStrings(char* a, char* b)
+{
+ int alen = strlen(a);
+ int blen = strlen(b);
+ char* sum = malloc(alen+blen+1);
+ int i;
+ for (i = 0; i < alen; i++)
+ sum[i] = a[i];
+ for (i = alen; i < alen+blen; i++)
+ sum[i] = b[i-alen];
+ sum[alen+blen] = 0;
+ return sum;
+}
+
+char* doubleToString(double d)
+{
+ char* x = malloc(64);
+ sprintf(x, "%f", d);
+ return x;
+}
+
+
+double getProbability(char* s, char* ngram)
+{
+ int slen = strlen(s);
+ int ngIndex = indexOf(s, addStrings(ngram, ":"), 0, slen);
+ if (ngIndex < 0)
+ return 0.5;
+ ngIndex += strlen(ngram)+1;
+ int endingIndex = indexOfChar(s, '\n', ngIndex, slen);
+ if (endingIndex == -1)
+ endingIndex = strlen(s);
+ double p;
+ char* sp = substring(s, ngIndex, endingIndex);
+ p = atof(sp);
+ free(sp);
+ return p;
+}
+
+char* replaceProbability(char* s, char* ngram, double newP)
+{
+ int slen = strlen(s);
+ char* withColon = addStrings(ngram, ":");
+ int ngIndex = indexOf(s, withColon, 0, slen);
+ if (ngIndex < 0)
+ return addStrings(addStrings(s, addStrings("\n", withColon)), doubleToString(newP));
+ ngIndex += strlen(ngram)+1;
+ int endingIndex = indexOfChar(s, '\n', ngIndex, slen);
+ if (endingIndex == -1)
+ endingIndex = slen;
+
+ char* beforeNG = substring(s, 0, ngIndex);
+ char* afterNG = substring(s, endingIndex, slen);
+ char* newNG = doubleToString(newP);
+ char* bngng = addStrings(beforeNG, newNG);
+
+
+ char* newS = addStrings(bngng, afterNG);
+ return newS;
+}
diff --git a/src/StringIntMap.h b/src/StringIntMap.h
new file mode 100644
index 0000000..77f227b
--- /dev/null
+++ b/src/StringIntMap.h
@@ -0,0 +1,107 @@
+typedef struct StringIntPair
+{
+ char* s;
+ int i;
+} StringIntPair;
+
+typedef struct StringIntMap
+{
+ int index;
+ int capacity;
+ StringIntPair* pairs;
+} StringIntMap;
+
+StringIntMap newStringIntMap()
+{
+ StringIntMap sim;
+ StringIntPair sip;
+ StringIntPair* sipp = &sip;
+
+ sim.index = 0;
+ sim.capacity = 0;
+ sim.pairs = sipp;
+ return sim;
+}
+
+void resize(StringIntMap* sim, int capacity)
+{
+ int old_capacity = sim->capacity;
+ sim->capacity = capacity;
+ StringIntPair* oldPairs = sim->pairs;
+ StringIntPair* newPairs = malloc(sizeof(StringIntPair)*capacity);
+ int i;
+ for (i = 0; i < old_capacity; i++)
+ newPairs[i] = oldPairs[i];
+
+ sim->pairs = newPairs;
+}
+
+void mapAdd(StringIntMap* sim, char* s, int i)
+{
+ if (sim->index >= sim->capacity)
+ {
+ if (sim->capacity == 0)
+ resize(sim, 1);
+ else
+ resize(sim, sim->capacity*2);
+ }
+
+ StringIntPair sip;
+ sip.i = i;
+ sip.s = s;
+ sim->pairs[sim->index++] = sip;
+
+}
+
+void mapSet(StringIntMap* sim, char* s, int i)
+{
+ int x;
+ for (x = 0; x < sim->index; x++)
+ {
+ if (strEqual(sim->pairs[x].s, s))
+ {
+ sim->pairs[x].i = i;
+ return;
+ }
+ }
+}
+
+void mapInc(StringIntMap* sim, char* s)
+{
+ int i;
+ for (i = 0; i < sim->index; i++)
+ {
+ if (strEqual(sim->pairs[i].s, s))
+ {
+ sim->pairs[i].i++;
+ return;
+ }
+ }
+}
+
+int mapAt(StringIntMap sim, char* s)
+{
+ int i;
+ for (i = 0; i < sim.index; i++)
+ if (strEqual(sim.pairs[i].s, s))
+ return sim.pairs[i].i;
+
+ return -1;
+}
+void printMap(StringIntMap sim)
+{
+ int i;
+ for (i = 0; i < sim.index; i++)
+ printf("%s:%d\n", sim.pairs[i].s, sim.pairs[i].i);
+ printf("\n\n");
+}
+
+int mapContains(StringIntMap sim, char* s)
+{
+ int i;
+ for (i = 0; i < sim.index; i++)
+ if (strEqual(sim.pairs[i].s, s))
+ return 1;
+
+ return 0;
+}
diff --git a/src/main.c b/src/main.c
new file mode 100644
index 0000000..9f9043a
--- /dev/null
+++ b/src/main.c
@@ -0,0 +1,138 @@
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+#include "StringFunctions.h"
+#include "FileIO.h"
+#include "StringIntMap.h"
+#define DEFAULT_NGRAM_LENGTH 5
+#define DEFAULT_POWER 0.75
+
+char* stlContents;
+double power;
+double rating;
+
+void displayHelp()
+{
+ //Display StringLearn documentation
+ printf("StringLearn documentation\nStringLearn usage: StringLearn <string> -r <rating> -s <stlfile>\n");
+}
+
+int usageError(char* argv0)
+{
+ fprintf(stderr, "Error. Usage: %s <string> -r <rating> -s <stlfile>\n", argv0);
+}
+
+double sigmoid(double z)
+{
+ return 1.0/(1.0+exp(-z));
+}
+
+double inverseSigmoid(double z)
+{
+ if (z == 1)
+ return 10000;
+ if (z == 0)
+ return -10000;
+ return log(z/(1.0-z));
+}
+
+double benefitOf(int usage)
+{
+ return rating*pow(usage, power);
+}
+
+double newProbability(double currentProbability, int usage)
+{
+ return sigmoid(benefitOf(usage)+inverseSigmoid(currentProbability));
+}
+
+int main(int argc, char** argv)
+{
+ int i;
+ if (strListIndex(argv, argc, "--help") != -1)
+ {
+ displayHelp();
+ return 0;
+ }
+
+ int dashrIndex = strListIndex(argv, argc, "-r");
+ int dashsIndex = strListIndex(argv, argc, "-s");
+ int dashlIndex = strListIndex(argv, argc, "-l");
+ int dashpIndex = strListIndex(argv, argc, "-p");
+ if (argc < 6 || dashrIndex == -1 || dashsIndex == -1 || dashrIndex == argc-1 || dashsIndex == argc-1 || dashlIndex == argc-1 || dashpIndex == argc-1)
+ usageError(argv[0]);
+
+
+
+ char* string;
+ char* stlFilename;
+ int ngramLength = DEFAULT_NGRAM_LENGTH;
+ power = DEFAULT_POWER;
+ for (i = 1; i < argc; i++)
+ {
+ if (argv[i][0] == '-' && !strEqual(argv[i-1], "-r"))
+ continue;
+
+ if (strEqual(argv[i-1], "-r"))
+ {
+ rating = atof(argv[i]);
+ }
+ else if (strEqual(argv[i-1], "-s"))
+ stlFilename = argv[i];
+
+ else if (strEqual(argv[i-1], "-l"))
+ ngramLength = atoi(argv[i]);
+
+ else if (strEqual(argv[i-1], "-p"))
+ power = atof(argv[i]);
+
+ else
+ string = argv[i];
+ }
+ if (rating == 0)
+ return 0;
+
+
+ stlContents = fileContents(stlFilename);
+
+ StringIntMap usage = newStringIntMap();
+ int slen = strlen(string);
+
+ int ngramLen;
+
+ char* ngram;
+ for (ngramLen = 1; ngramLen <= ngramLength; ngramLen++)
+ {
+ for (i = 0; i <= slen-ngramLen; i++)
+ {
+ ngram = substring(string, i, ngramLen);
+
+ if (mapContains(usage, ngram))
+ {
+ mapInc(&usage, ngram);
+ continue;
+ }
+ mapAdd(&usage, ngram, 1);
+ }
+ }
+
+ int currentUsage;
+ double np, prob;
+
+ for (i = 0; i < usage.index; i++)
+ {
+
+ ngram = usage.pairs[i].s;
+
+ currentUsage = usage.pairs[i].i;
+ prob = getProbability(stlContents, ngram);
+ np = newProbability(prob, currentUsage);
+ stlContents = replaceProbability(stlContents, ngram, np);
+ }
+
+ fileWrite(stlFilename, stlContents);
+
+ return 0;
+
+}